Repository 'rsem'
hg clone https://toolshed.g2.bx.psu.edu/repos/jjohnson/rsem

Changeset 0:ca988deacfd1 (2014-02-07)
Next changeset 1:6c4337536e17 (2014-02-07)
Commit message:
Uploaded
added:
datatypes_conf.xml
extract_transcript_to_gene_map_from_trinity.xml
rsem.py
rsem_calculate_expression.xml
rsem_prepare_reference.xml
test-data/._RSEM_ref_reference.zip
test-data/._rsem_sample.gene_abundances.tabular
test-data/._rsem_sample.isoform_abundances.tabular
test-data/._rsem_sample.rsem_log.txt
test-data/._trinity_assembled_transcripts.fasta
test-data/._trinity_gene_map.tabular
test-data/RSEM_ref_reference.zip
test-data/ref.fasta
test-data/ref.gtf
test-data/rsem_sample.gene_abundances.tabular
test-data/rsem_sample.isoform_abundances.tabular
test-data/rsem_sample.rsem_log.txt
test-data/test.fq
test-data/trinity_assembled_transcripts.fasta
test-data/trinity_gene_map.tabular
tool-data/rsem_indices.loc.sample
tool_data_table_conf.xml.sample
tool_dependencies.xml
b
diff -r 000000000000 -r ca988deacfd1 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Fri Feb 07 08:07:29 2014 -0500
b
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<datatypes>
+    <datatype_files>
+        <datatype_file name="rsem.py"/>
+    </datatype_files>
+    <registration>
+        <datatype extension="rsem_ref" type="galaxy.datatypes.rsem:RsemReference" display_in_upload="True"/>
+    </registration>
+</datatypes>
+
b
diff -r 000000000000 -r ca988deacfd1 extract_transcript_to_gene_map_from_trinity.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_transcript_to_gene_map_from_trinity.xml Fri Feb 07 08:07:29 2014 -0500
b
@@ -0,0 +1,41 @@
+<tool id="extract_transcript_to_gene_map_from_trinity" name="RSEM trinity fasta to gene map" version="1.1.17">
+  <description>extract transcript to gene map from trinity</description>
+  <requirements>
+    <requirement type="package" version="1.1.17">rsem</requirement>
+  </requirements>
+  <command>
+    extract-transcript-to-gene-map-from-trinity $trinity_fasta_file $map_file
+  </command>
+
+  <inputs>
+    <param name="trinity_fasta_file" type="data" format="fasta" label="Trinity fasta file" />
+  </inputs>
+  <stdio>
+    <exit_code range="1:"  level="fatal" description="Error Running RSEM" />
+  </stdio>
+  <outputs>
+    <data format="tabular" name="map_file" label="${tool.name} on ${on_string}: gene map file"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="trinity_fasta_file" value="trinity_assembled_transcripts.fasta" ftype="fasta"/>
+      <output name="map_file" file="trinity_gene_map.tabular"/>
+    </test>
+  </tests>
+  <help>
+  
+
+RSEM HOME PAGE - http://deweylab.biostat.wisc.edu/rsem/
+
+NAME
+    extract-transcript-to-gene-map-from-trinity
+
+SYNOPSIS
+    extract-transcript-to-gene-map-from-trinity trinity_fasta_file map_file
+
+DESCRIPTION
+    generates a gene_mp_file from a trinity fasta file

+
+  </help> 
+</tool> 
b
diff -r 000000000000 -r ca988deacfd1 rsem.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rsem.py Fri Feb 07 08:07:29 2014 -0500
[
@@ -0,0 +1,82 @@
+"""
+RSEM datatypes
+"""
+import os,os.path,re,sys
+import galaxy.datatypes.data
+from galaxy.datatypes.images import Html
+from galaxy.datatypes.metadata import MetadataElement
+
+class RsemReference( Html ):
+    """Class describing an RSEM reference"""
+    MetadataElement( name='reference_name', default=None, desc='RSEM Reference Name', readonly=True, visible=True, no_value=None )
+
+    file_ext = 'rsem_ref'
+    is_binary = True
+    composite_type = 'auto_primary_file'
+    allow_datatype_change = False
+
+    def generate_primary_file( self, dataset = None ):
+        """
+        This is called only at upload to write the html file
+        cannot rename the datasets here - they come with the default unfortunately
+        """
+        return '<html><head></head><body>AutoGenerated Primary File for RSEM Reference Composite Dataset</body></html>'
+
+    def regenerate_primary_file(self,dataset):
+        """
+        cannot do this until we are setting metadata
+        """
+        refname = dataset.metadata.reference_name
+        flist = os.listdir(dataset.extra_files_path)
+        rval = ['<html><head><title>RSEM Reference %s</title></head><p/>Comprises the following files:<p/><ul>' % (refname)]
+        for i,fname in enumerate(flist):
+            sfname = os.path.split(fname)[-1]
+            rval.append( '<li><a href="%s">%s</a>' % ( sfname, sfname ) )
+        rval.append( '</ul></html>' )
+        f = file(dataset.file_name,'w')
+        f.write("\n".join( rval ))
+        f.write('\n')
+        f.close()
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            dataset.peek  = "RSEM Reference (%s)" % ( dataset.metadata.reference_name )
+            dataset.blurb =  "RSEM Reference (%s)" % ( dataset.metadata.reference_name )
+        else:
+            dataset.peek = 'RSEM Reference (%s) does not exist' % ( dataset.metadata.reference_name )
+            dataset.blurb = 'RSEM Reference (%s) purged from disk' % ( dataset.metadata.reference_name )
+
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "RSEM Reference"
+
+    def set_meta( self, dataset, overwrite = True, **kwd ):
+        """
+        Expecting files:
+        extra_files_path/<reference_name>.grp
+        extra_files_path/<reference_name>.ti
+        extra_files_path/<reference_name>.seq
+        extra_files_path/<reference_name>.transcripts.fa
+        Optionally includes files:
+        extra_files_path/<reference_name>.chrlist
+        extra_files_path/<reference_name>.idx.fa
+        extra_files_path/<reference_name>.4.ebwt
+        extra_files_path/<reference_name>.3.ebwt
+        extra_files_path/<reference_name>.2.ebwt
+        extra_files_path/<reference_name>.1.ebwt
+        extra_files_path/<reference_name>.rev.2.ebwt
+        extra_files_path/<reference_name>.rev.1.ebwt
+        """
+        pat = '^(.*)\.grp$'
+        efp = dataset.extra_files_path
+        flist = os.listdir(efp)
+        for i,fname in enumerate(flist):
+          m = re.match(pat,fname)
+          if m: 
+            dataset.metadata.reference_name = m.groups()[0]
+            break
+        self.regenerate_primary_file(dataset)
+
+
b
diff -r 000000000000 -r ca988deacfd1 rsem_calculate_expression.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rsem_calculate_expression.xml Fri Feb 07 08:07:29 2014 -0500
b
b'@@ -0,0 +1,719 @@\n+<tool id="rsem_calculate_expression" name="RSEM calculate expression" version="1.1.17">\n+  <description>RNA-Seq by Expectation-Maximization</description>\n+  <requirements>\n+    <requirement type="package" version="1.1.17">rsem</requirement>\n+    <requirement type="package" version="0.1.19">samtools</requirement>\n+    <requirement type="package" version="1.0.0">bowtie</requirement>\n+  </requirements>\n+  <command>\n+    rsem-calculate-expression \n+    ## --tag string\n+    #if $seedlength:\n+      --seed-length $seedlength \n+    #end if\n+    --forward-prob $forward_prob\n+    #if $rsem_options.fullparams == \'fullset\':\n+      ## Fragment info\n+      #if $rsem_options.fragment_length_mean:\n+        --fragment-length-mean $rsem_options.fragment_length_mean\n+      #end if\n+      #if $rsem_options.fragment_length_min:\n+        --fragment-length-min $rsem_options.fragment_length_min\n+      #end if\n+      #if $rsem_options.fragment_length_sd:\n+        --fragment-length-sd $rsem_options.fragment_length_sd\n+      #end if\n+      #if $rsem_options.fragment_length_max:\n+        --fragment-length-max $rsem_options.fragment_length_max\n+      #end if\n+      ## RSPD\n+      #if $rsem_options.rspd.estimate == \'yes\':\n+        --estimate-rspd\n+        #if $rsem_options.rspd.num_rspd_bins:\n+          --num-rspd-bins $rsem_options.rspd.num_rspd_bins\n+        #end if\n+      #end if\n+      ## Calculate 95% credibility intervals and posterior mean estimates. \n+      #if $rsem_options.useci.ci == \'yes\':\n+        --calc-ci\n+        #if $rsem_options.useci.cimem:\n+          --ci-memory $rsem_options.useci.cimem\n+        #end if\n+      #end if\n+    #end if\n+    ## --num-threads $GALAXY_SLOTS\n+    #if $input.format != \'bam\' and $input.bowtie_options.fullparams == \'fullset\':\n+      ## Bowtie params\n+      #if $bowtie_options.bowtie_e:\n+      --bowtie-e $bowtie_options.bowtie_e\n+      #end if\n+      #if $bowtie_options.bowtie_m:\n+      --bowtie-m $bowtie_options.bowtie_m\n+      #end if\n+      #if $bowtie_options.bowtie_n:\n+      --bowtie-n $bowtie_options.bowtie_n\n+      #end if\n+    #end if\n+    ## Outputs\n+    #if $rsem_outputs.result_bams == \'none\':\n+      --no-bam-output\n+    #else\n+      #if $rsem_outputs.result_bams == \'both\':\n+        --output-genome-bam\n+      #end if\n+      $rsem_outputs.sampling_for_bam\n+    #end if\n+    ## Input data\n+    #if $input.format=="fastq"\n+      $input.fastq_select\n+      #if $input.fastq.matepair=="single": \n+        $input.fastq.singlefastq\n+      #elif $input.fastq.matepair=="paired":\n+        --paired-end\n+        $input.fastq.fastq1\n+        $input.fastq.fastq2\n+      #end if\n+    #elif $input.format=="fasta"\n+      --no-qualities\n+      #if $input.fasta.matepair=="single": \n+        $input.fasta.singlefasta\n+      #elif $input.fasta.matepair=="paired":\n+        --paired-end\n+        $input.fasta.fasta1\n+        $input.fasta.fasta2\n+      #end if\n+    #elif $input.format=="sam"\n+      #if $input.matepair=="paired":\n+        --paired-end\n+      #end if\n+      #if $input.rsem_sam._extension == \'sam\':\n+        --sam\n+      #elif $input.rsem_sam._extension == \'bam\':\n+        --bam\n+      #end if\n+      $input.rsem_sam\n+    #end if\n+    ## RSEM reference\n+    #if $reference.refSrc == \'history\':\n+      ${reference.rsem_ref.extra_files_path}/${reference.rsem_ref.metadata.reference_name}\n+    #elif $reference.refSrc == \'cached\':\n+      ${reference.index.fields.path}\n+    #end if\n+    ## sample_name: use a hard coded name so we can pull out galaxy outputs\n+    rsem_output\n+    ## direct output into logfile\n+    > $log\n+  </command>\n+  <macros>\n+    <macro name="rsem_options">\n+        <param name="seedlength" type="integer" value="25" optional="true" label="Seed length used by the read aligner" help="Providing the correct value for this parameter is important for RSEM\'s accuracy if the data are single-end reads. RSEM uses this value for Bowtie\'s seed length parameter. The minimum value is 25. (Default:25)">\n+  '..b"f a read. In addition, RSEM pads a\n+        new tag ZW:f:value, where value is a single precision floating\n+        number representing the posterior probability. If an alignment is\n+        spliced, a XS:A:value tag is also added, where value is either '+'\n+        or '-' indicating the strand of the transcript it aligns to.\n+\n+        'sample_name.genome.sorted.bam' and\n+        'sample_name.genome.sorted.bam.bai' are the sorted BAM file and\n+        indices generated by samtools (included in RSEM package).\n+\n+    sample_name.sam.gz\n+        Only generated when the input files are raw reads instead of SAM/BAM\n+        format files\n+\n+        It is the gzipped SAM output produced by bowtie aligner.\n+\n+    sample_name.time\n+        Only generated when --time is specified.\n+\n+        It contains time (in seconds) consumed by aligning reads, estimating\n+        expression levels and calculating credibility intervals.\n+\n+    sample_name.stat\n+        This is a folder instead of a file. All model related statistics are\n+        stored in this folder. Use 'rsem-plot-model' can generate plots\n+        using this folder.\n+\n+EXAMPLES\n+    Assume the path to the bowtie executables is in the user's PATH\n+    environment variable. Reference files are under '/ref' with name\n+    'mouse_125'.\n+\n+    1) '/data/mmliver.fq', single-end reads with quality scores. Quality\n+    scores are encoded as for 'GA pipeline version >= 1.3'. We want to use 8\n+    threads and generate a genome BAM file:\n+\n+     rsem-calculate-expression --phred64-quals \\\n+                               -p 8 \\\n+                               --output-genome-bam \\\n+                               /data/mmliver.fq \\\n+                               /ref/mouse_125 \\\n+                               mmliver_single_quals\n+\n+    2) '/data/mmliver_1.fq' and '/data/mmliver_2.fq', paired-end reads with\n+    quality scores. Quality scores are in SANGER format. We want to use 8\n+    threads and do not generate a genome BAM file:\n+\n+     rsem-calculate-expression -p 8 \\\n+                               --paired-end \\\n+                               /data/mmliver_1.fq \\\n+                               /data/mmliver_2.fq \\\n+                               /ref/mouse_125 \\\n+                               mmliver_paired_end_quals\n+\n+    3) '/data/mmliver.fa', single-end reads without quality scores. We want\n+    to use 8 threads:\n+\n+     rsem-calculate-expression -p 8 \\\n+                               --no-qualities \\\n+                               /data/mmliver.fa \\\n+                               /ref/mouse_125 \\\n+                               mmliver_single_without_quals\n+\n+    4) Data are the same as 1). We want to take a fragment length\n+    distribution into consideration. We set the fragment length mean to 150\n+    and the standard deviation to 35. In addition to a BAM file, we also\n+    want to generate credibility intervals. We allow RSEM to use 1GB of\n+    memory for CI calculation:\n+\n+     rsem-calculate-expression --bowtie-path /sw/bowtie \\\n+                               --phred64-quals \\\n+                               --fragment-length-mean 150.0 \\\n+                               --fragment-length-sd 35.0 \\\n+                               -p 8 \\\n+                               --output-genome-bam \\\n+                               --calc-ci \\\n+                               --ci-memory 1024 \\\n+                               /data/mmliver.fq \\\n+                               /ref/mouse_125 \\\n+                               mmliver_single_quals\n+\n+    5) '/data/mmliver_paired_end_quals.bam', paired-end reads with quality\n+    scores. We want to use 8 threads:\n+\n+     rsem-calculate-expression --paired-end \\\n+                               --bam \\\n+                               -p 8 \\\n+                               /data/mmliver_paired_end_quals.bam \\\n+                               /ref/mouse_125 \\\n+                               mmliver_paired_end_quals\n+  </help> \n+</tool> \n"
b
diff -r 000000000000 -r ca988deacfd1 rsem_prepare_reference.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rsem_prepare_reference.xml Fri Feb 07 08:07:29 2014 -0500
[
@@ -0,0 +1,127 @@
+<tool id="rsem_prepare_reference" name="RSEM prepare reference" version="1.1.17">
+  <description></description>
+  <requirements>
+    <requirement type="package" version="1.1.17">rsem</requirement>
+    <requirement type="package" version="1.0.0">bowtie</requirement>
+  </requirements>
+  <command>
+    mkdir $reference_file.extra_files_path &amp;&amp; 
+    cd $reference_file.extra_files_path &amp;&amp; 
+    rsem-prepare-reference 
+    #if $polya.polya_use == 'add':
+      #if $polya.polya_length:
+        --polyA-length $polya.polya_length
+      #end if
+    #elif $polya.polya_use == 'subset':
+      --no-polyA-subset $polya.no_polya_subset
+      #if $polya.polya_length:
+        --polyA-length $polya.polya_length
+      #end if
+    #elif $polya.polya_use == 'none':
+      --no-polyA
+    #end if
+    $ntog
+    #if $transcript_to_gene_map:
+      --transcript-to-gene-map $transcript_to_gene_map
+    #end if
+    #if $reference.ref_type == 'transcripts':
+      $reference.reference_fasta_file
+    #else:
+      --gtf $reference.gtf
+      $reference.reference_fasta_file
+    #end if
+    $reference_name
+  </command>
+  <inputs>
+    <conditional name="reference">
+      <param name="ref_type" type="select" label="Reference transcript source">
+        <option value="transcripts">transcript fasta</option>
+        <option value="genomic">reference genome and gtf</option>
+      </param>
+      <when value="transcripts">
+        <param name="reference_fasta_file" type="data" format="fasta" label="reference fasta file" 
+               help="The files should contain the sequences of transcripts."/>
+      </when>
+      <when value="genomic">
+        <param name="reference_fasta_file" type="data" format="fasta" label="reference fasta file" 
+               help="The file should contain the sequence of an entire genome."/>
+        <param name="gtf" type="data" format="gtf" label="gtf" 
+               help="extract transcript reference sequences using the gene annotations specified in this GTF" />
+      </when>
+    </conditional>
+    <param name="transcript_to_gene_map" type="data" format="tabular" optional="true" label="Map of gene ids to transcript (isoform) ids" >
+      <help>
+        Each line of should be of the form: gene_id transcript_id ( with the two fields separated by a tab character )
+        The map can be obtained from the UCSC table browser  
+          group: Genes and Gene Prediction Tracks 
+          table: knownIsoforms
+        Without a map:
+          If a reference genome and gtf is used, then RSEM uses the "gene_id" and "transcript_id" attributes in the GTF file. 
+          Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene.
+      </help>
+    </param>
+    <param name="reference_name" type="text" value="rsem_ref_name" label="reference name">
+      <help>A one work name for this RSEM reference containing only letters, digits, and underscore characters</help>
+      <validator type="regex" message="Use only letters, digits, and underscore characters">^\w+$</validator>
+    </param>
+    <conditional name="polya">
+      <param name="polya_use" type="select" label="PolyA ">
+        <option value="add" selected="true">Add poly(A) tails to all transcripts</option>
+        <option value="subset">Exclude poly(A) tails from selected transcripts</option>
+        <option value="none">Do not add poly(A) tails to any transcripts</option>
+      </param>
+      <when value="add">
+        <param name="polya_length" type="integer" value="125" optional="true" label="The length of the poly(A) tails to be added. (Default: 125)">
+          <validator type="in_range" message="must be positive " min="1"/>
+        </param>
+      </when>
+      <when value="subset">
+        <param name="no_polya_subset" type="data" format="tabular" optional="true" label="List of transcript IDs (one per line) that should should not have polyA tails added."/> 
+        <param name="polya_length" type="integer" value="125" optional="true" label="The length of the poly(A) tails to be added. (Default: 125)">
+          <validator type="in_range" message="must be positive " min="1"/>
+        </param>
+      </when>
+      <when value="none"/>
+    </conditional>
+    <param name="ntog" type="boolean" truevalue="--no-ntog" falsevalue="" checked="false" label="Disable the conversion of 'N' characters to 'G' characters in the reference sequences" help="Bowite uses the automatic N to G conversion to to align against all positions in the reference."/>
+  </inputs>
+  <stdio>
+    <exit_code range="1:"  level="fatal" description="Error Running RSEM" />
+  </stdio>
+  <outputs>
+    <data format="rsem_ref" name="reference_file" label="RSEM ${reference_name} reference"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="ref_type" value="genomic"/>
+      <param name="reference_fasta_file" value="ref.fasta" ftype="fasta"/>
+      <param name="gtf" value="ref.gtf" ftype="gtf"/>
+      <param name="reference_name" value="ref"/>
+      <output name="rsem_ref">
+        <assert_contents>
+          <has_text text="ref.grp" />
+        </assert_contents>
+      </output>
+    </test>
+  </tests>
+  <help>
+
+RSEM HOME PAGE - http://deweylab.biostat.wisc.edu/rsem/
+
+NAME
+    rsem-prepare-reference
+
+SYNOPSIS
+    rsem-prepare-reference [options] reference_fasta_file(s) reference_name
+
+DESCRIPTION
+    The rsem-prepare-reference program extracts/preprocesses the reference sequences and builds Bowtie indices using default parameters. 
+    This program is used in conjunction with the 'rsem-calculate-expression' program.
+
+INPUTS
+    A fasta file of transcripts
+    or
+    A genome sequence fasta file and a GTF gene annotation file.  (When using UCSC data, include the related knownIsoforms.txt)
+
+  </help> 
+</tool> 
b
diff -r 000000000000 -r ca988deacfd1 test-data/._RSEM_ref_reference.zip
b
Binary file test-data/._RSEM_ref_reference.zip has changed
b
diff -r 000000000000 -r ca988deacfd1 test-data/._rsem_sample.gene_abundances.tabular
b
Binary file test-data/._rsem_sample.gene_abundances.tabular has changed
b
diff -r 000000000000 -r ca988deacfd1 test-data/._rsem_sample.isoform_abundances.tabular
b
Binary file test-data/._rsem_sample.isoform_abundances.tabular has changed
b
diff -r 000000000000 -r ca988deacfd1 test-data/._rsem_sample.rsem_log.txt
b
Binary file test-data/._rsem_sample.rsem_log.txt has changed
b
diff -r 000000000000 -r ca988deacfd1 test-data/._trinity_assembled_transcripts.fasta
b
Binary file test-data/._trinity_assembled_transcripts.fasta has changed
b
diff -r 000000000000 -r ca988deacfd1 test-data/._trinity_gene_map.tabular
b
Binary file test-data/._trinity_gene_map.tabular has changed
b
diff -r 000000000000 -r ca988deacfd1 test-data/RSEM_ref_reference.zip
b
Binary file test-data/RSEM_ref_reference.zip has changed
b
diff -r 000000000000 -r ca988deacfd1 test-data/ref.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ref.fasta Fri Feb 07 08:07:29 2014 -0500
b
b'@@ -0,0 +1,10000 @@\n+>1\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN'..b'CCTATTCTAGGCTCCAGGAGCGAGACCTCCTCCATGTGCCTGCTAACGTCAGTGCCACGGGC\n+CCTCTTTGGTTCTCCTGCCCACCTTCTCTGACTCTGCCCAACACCCACGGGCCAGAGGCCCTTTTCCTTCTTGGCACTTC\n+TCGGTGTTGTAGCAGGATGAGCCACGGACAAAACCCCTCAGACGCCGGGTTAAGGAATGATTTGGTTTTATTCGGCCAGG\n+AGCTTCAGCGGACTCAAGTCTCAAGAACCGAACTCTCTGAAGACAGAGTTCCTGGCCCTTTTAAGGGTTTACAACCTTTA\n+GGTTCCACGTGAAAGGGTCGTGATAGATTGAGAGCACATGCGTTTAGAGTGGGGTGGGGGGGTTAATCTTTTAACCTCAG\n+GCCTGATCATCAGGGGCACCAGCTGGTCTTGCCACTGACTTCATTCCTGTTGTTTTTCAACTTTTACTTCCTCCTCCTCT\n+TCAGACAGGAGACAGTAAGAGAAATGGCCTCTCTCCTCAGCGGCACAATGGACTTTAGACATCGCGCCTGTTTTGTACTC\n+TCCAAGCCGGACTTCTCTACTCGCTGGCCTGGCGCTGATGCCCAGAGGCCCCCTGTGCTTGTGGTCGTGGCCTTAGACAG\n+TTCCCTGCCCTTTGTAAGTACTAACTTAACTCTTGAATGTGGACGTTTTAAGCTGACTTTTCTTCTCAGCCCTCTGTAAG\n+AGTTTCGAATCTCCCCATAGCAATCAATGTAATTGTTTTCACCCAGGGTGTTGGTCCTGGCTGCTTCGATTTTAAAGTAA\n+TCTCTCATTTTGCTGAGGAGAATTTATTAGAGTAAATGACTGTTTTCACCTGTCATGTCACATTAAAATGTGGAGAGAAC\n+TGGTTGTAGGGGCAGTCAAACTGGCTGTGAGACCCTTCGAGCAGCCATGCATCTCCTGGACAGCTCGAACACTGATCAGG\n+GAGGAGATACTGAAAATAAGAAATAAGAATTGCCTTTATATGTGTGTATGAGTATAGACACATACATCATGTACATGTAT\n+GTGTGTATGTGCATATGTGTGCATGTGAAGATGGATAGATGATAGATACATAGGTGATTTATGTATAGGTAGATGACAGA\n+TATTGGTAATTAATTGATAGTAGATAGGTTGATAGTATATAGGTGATTGATAGATGGATGATAGATCATTGATAGATGAT\n+AGGTAGATAGTAGATAGATTATAGGTAGATAAATAGATGGATGATGGATGGATGATAGATACTTGATTGATAGATGATAG\n+GTAGATAGTAGATAGATGATAGATAATGATAGATGATAGCTAGATAGTAGATAGATGTGATAGGTAGATGGTAGATAGAG\n+ATGATGATAGATGGATGATAGATGGATGGATGGATGATAGGTGATTGATAGATGGTAGATAGGTGGATGGTAGAGATGGT\n+AGATAAATAGATGGATGATGGATGGATGATAGAGAGCTGATAGTAGATAAGGTAGATAGTAGATAGATGATAGATGGATG\n+GATGATAGATGATTGATAGAGAGATAGAGAGAGAAGGATGATAGACTGAGCATCATAACTTCACTGAATGCTACCAGTTG\n+GATTCAAAAGTCAGCAAACTGAAACGTCGAATTTCAGCCAGCTATCAGAAATATTGTAATCTGACAACAAAAGATAAACA\n+AGGGAACATGGAAGTGGCAGCTATTCCAGAAGCTGCACCTCTCAGCATGGGCACAGAACCACAGAAGTGGAGCCAGAGTC\n+CTCTCCACATCTCCTCTGGGTCGGCGTTATCATTGTCACAATGGGTGGAGGCTGGACCTTGTGAAGGCAAAGGCATACGG\n+TGCTTTCTTGCTTCTGCTGTTTGGTGACTCCCTTACTCATTCCCTCTGCGGTGGCTGAGGGGCTGATGATCAGTTCCTCT\n+GCCCAGAATGCCCTGACCATGCTGGGTGTCTGTCCTCCCTAGACCTGGGTCTGCAGGATGACAAGGACCCTGCAGCAGAA\n+ACTCCTCTGTCTCTTCCCTGAGCACCGTCTTCCATGCTGCCTGACTATGGTGCTGGTTCCCCTCTGATATGGAGATTGGT\n+GGAGAATGACCAGGATATTTTATGTGGCAATGGGGGTTGAAGCCCACTTCAAAATGGCTTAGGATAAGTGACCAAAAGTC\n+CCAGTGTGAGCATCAGGCATGGCTGGATCCAGGTGCTCAGCACCATCTGGAAATTTGTCTCCATCTTTGACTCTGCTGTC\n+TCTGTGTGGGCTTCATTTCTAAGATGGTTTTCTCTGCACAGCTGTAACAAAGCTCAGGGCTTGCATGATGCTTTGCAGCC\n+AGTACATCAGAGAGGACTTTCCCTCAAGACTTCTGGCAAAATTTGGGGTAACATGCTGTTCTTGCTGGGGTCAGGAGCCA\n+TCTTGCACCAATCTCAGAGGCCAGGGGATGGAGAACTCTCCTAGGGTAGTCCCAGGTTCTGTGCCTCCCCGGAAAACACA\n+AGGTCGGAGCCTCCCCATCCAAATTAATGCTCAGACTGTCAGGGAGGAATGACTTCCAAAGAAAAACCAAGGGCCCTTAT\n+GAAGAAAAGGAAGGGGTGCACCTACAGTTGCCAGTTCTGTTTGGAAGTGAGGGAGGGGAGGGCAGATTCCCTTGGCTTGT\n+TCCCTAGGACAGCTGTACTTAGGTTTTACTACACAGTCCACTGAAAATGCTTCTCTCATTAGTGTTGTGCCTCTCTCATG\n+CCTCTTTCCTTGGATAAAGAGCAAGCACATTACGAGCCACACTTGGTGCTGGTGCTGTGGCATGCAACACTACCTAATGC\n+GAGAGAAAGATGTGAGCAAATATCCGGAATATACAAATATAATACAAAAATACAAATATAATCAGTTATACAGCAAACAG\n+CTGTGGGAACAGCAGCTCTGCTTGCAGGGTTGGGTAGTGGAAGGTTTCAGAGTTGATAGCATTTGGATTGGGCTTTAAGG\n+TATGACTAGGAGCTTACCAGATAGAGAAGTGGTGTTTGGACGTTGTATGAGAAGGAAACACAATCTGAGAAGGTGTACCA\n+TCCTAAGTGAATCTGTCACAGAAGCGAGAAGATGCTGTGTAAACCGGAATAATGCTTGTGTGCATGGATCTTGATCCCTT\n+CGTGAGCAATAAATGATTTTGTGAATAAATTGAAGAATGCTGAATTAGATCATAGGGTGTGTTTGCCGGGGTGGGGAGAG\n+AGGAGAGCGTCGAAACTGGGGAAGTGGGGAACCAGAGATGGCAAAGCCTGGGACTTCCAGCTTCACCCACAGGACGCGAG\n+GAGCCTCATCAGTATCACCCGCACAAGCAAGCTCCCATGAAGGCAGCGGTGTCCAGGCTCTGTCGCGTCCTTTCTTTGCA\n+GCCTGGGCCTTGGTTTCTTGCAGGTGCTGTGCCCAAAGGTTAGATTGACAGATGTGGCCGTGTCTATCAGGGAACATGGA\n+CTGAGGCCAGTGCTTTCAACCATACTCACTTTTCACTGTGTTCAGAGGGGGTGGGAAATCTCCCCTAAGGAGGAGATACG\n+ACGTGTGCAGATTGAGAGCTGGGACCACACGGATTCCTCCAGGGGCAAGGGCTGGTGACCTTGCAGTAACTTCCAGAGGC\n+CTCAGCACCCTCCTCTGCACAATGGTGGTGGGCTAGCCAGGGCACCACAGAAAGGGCCAGCAAGGCACACCCTCGTCTAT\n+TGCCCAGGCACACATGGGCATCCATGACAGCCATGCATGTGTTAGAGCCCGACCAGAGCTGCAGCCTCCTTTGGCTGAGG\n+TTGGAAAAGCGGAGATGTGTTCACAGTTGGAGAATTGGTATGCCTTACCCAAGTGACATCCCTGTCCTTGGAAACTTATT\n+CTGGGGTCACAGAATTAATTCCCCAGGATTGGGTGTCACGAGGAGGTGGTTCCTGTGGCTGAAAACCACCAACACCTCCT\n'
b
diff -r 000000000000 -r ca988deacfd1 test-data/ref.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ref.gtf Fri Feb 07 08:07:29 2014 -0500
b
b'@@ -0,0 +1,339 @@\n+1\tprocessed_transcript\texon\t11869\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00002234944";\n+1\tprocessed_transcript\texon\t12613\t12721\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00003582793";\n+1\tprocessed_transcript\texon\t13221\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00002312635";\n+1\tunprocessed_pseudogene\texon\t11872\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000515242"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; exon_id "ENSE00002234632";\n+1\tunprocessed_pseudogene\texon\t12613\t12721\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000515242"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; exon_id "ENSE00003608237";\n+1\tunprocessed_pseudogene\texon\t13225\t14412\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000515242"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; exon_id "ENSE00002306041";\n+1\tunprocessed_pseudogene\texon\t11874\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000518655"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-202"; exon_id "ENSE00002269724";\n+1\tunprocessed_pseudogene\texon\t12595\t12721\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000518655"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-202"; exon_id "ENSE00002270865";\n+1\tunprocessed_pseudogene\texon\t13403\t13655\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000518655"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-202"; exon_id "ENSE00002216795";\n+1\tunprocessed_pseudogene\texon\t13661\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000518655"; exon_number "4"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-202"; exon_id "ENSE00002303382";\n+1\ttranscribed_unprocessed_pseudogene\texon\t12010\t12057\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001948541";\n+1\ttranscribed_unprocessed_pseudogene\texon\t12179\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001671638";\n+1\ttranscribed_unprocessed_pseudogene\texon\t12613\t12697\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001758273";\n+1\ttranscribed_unprocessed_pseudogene\texon\t12975\t13052\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "4"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001799933";\n+1\ttranscribed_unprocessed_pseudogene\texon\t13221\t13374\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "5"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001746346";\n+1\ttranscribed_unprocessed_pseudogene\texon\t13453\t13670\t.\t+\t.\tgene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "6"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001863096";\n+1\tunprocessed_pseudogene\texon\t29321\t29370\t.\t-\t.\tgene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "1"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; '..b'ipt_name "RP11-206L10.9-016"; exon_id "ENSE00001615350";\n+1\tprocessed_transcript\texon\t745447\t745541\t.\t-\t.\tgene_id "ENSG00000230092"; transcript_id "ENST00000447500"; exon_number "1"; gene_name "RP11-206L10.8"; gene_biotype "pseudogene"; transcript_name "RP11-206L10.8-002"; exon_id "ENSE00001651951";\n+1\tprocessed_transcript\texon\t743954\t744003\t.\t-\t.\tgene_id "ENSG00000230092"; transcript_id "ENST00000447500"; exon_number "2"; gene_name "RP11-206L10.8"; gene_biotype "pseudogene"; transcript_name "RP11-206L10.8-002"; exon_id "ENSE00001782999";\n+1\tprocessed_transcript\texon\t741179\t741271\t.\t-\t.\tgene_id "ENSG00000230092"; transcript_id "ENST00000447500"; exon_number "3"; gene_name "RP11-206L10.8"; gene_biotype "pseudogene"; transcript_name "RP11-206L10.8-002"; exon_id "ENSE00001640641";\n+1\tprocessed_transcript\texon\t736259\t736543\t.\t-\t.\tgene_id "ENSG00000230092"; transcript_id "ENST00000447500"; exon_number "4"; gene_name "RP11-206L10.8"; gene_biotype "pseudogene"; transcript_name "RP11-206L10.8-002"; exon_id "ENSE00001716345";\n+1\ttranscribed_unprocessed_pseudogene\texon\t745447\t745541\t.\t-\t.\tgene_id "ENSG00000230092"; transcript_id "ENST00000590817"; exon_number "1"; gene_name "RP11-206L10.8"; gene_biotype "pseudogene"; transcript_name "RP11-206L10.8-001"; exon_id "ENSE00001651951";\n+1\ttranscribed_unprocessed_pseudogene\texon\t743954\t744003\t.\t-\t.\tgene_id "ENSG00000230092"; transcript_id "ENST00000590817"; exon_number "2"; gene_name "RP11-206L10.8"; gene_biotype "pseudogene"; transcript_name "RP11-206L10.8-001"; exon_id "ENSE00001782999";\n+1\ttranscribed_unprocessed_pseudogene\texon\t741179\t741271\t.\t-\t.\tgene_id "ENSG00000230092"; transcript_id "ENST00000590817"; exon_number "3"; gene_name "RP11-206L10.8"; gene_biotype "pseudogene"; transcript_name "RP11-206L10.8-001"; exon_id "ENSE00001640641";\n+1\ttranscribed_unprocessed_pseudogene\texon\t736259\t736543\t.\t-\t.\tgene_id "ENSG00000230092"; transcript_id "ENST00000590817"; exon_number "4"; gene_name "RP11-206L10.8"; gene_biotype "pseudogene"; transcript_name "RP11-206L10.8-001"; exon_id "ENSE00001716345";\n+1\tprotein_coding\texon\t739121\t739137\t.\t-\t.\tgene_id "ENSG00000269831"; transcript_id "ENST00000599533"; exon_number "1"; gene_name "AL669831.1"; gene_biotype "protein_coding"; transcript_name "AL669831.1-201"; exon_id "ENSE00003063549";\n+1\tprotein_coding\tCDS\t739121\t739137\t.\t-\t0\tgene_id "ENSG00000269831"; transcript_id "ENST00000599533"; exon_number "1"; gene_name "AL669831.1"; gene_biotype "protein_coding"; transcript_name "AL669831.1-201"; protein_id "ENSP00000468859";\n+1\tprotein_coding\texon\t738788\t738812\t.\t-\t.\tgene_id "ENSG00000269831"; transcript_id "ENST00000599533"; exon_number "2"; gene_name "AL669831.1"; gene_biotype "protein_coding"; transcript_name "AL669831.1-201"; exon_id "ENSE00003084653";\n+1\tprotein_coding\tCDS\t738788\t738812\t.\t-\t1\tgene_id "ENSG00000269831"; transcript_id "ENST00000599533"; exon_number "2"; gene_name "AL669831.1"; gene_biotype "protein_coding"; transcript_name "AL669831.1-201"; protein_id "ENSP00000468859";\n+1\tprotein_coding\texon\t738532\t738618\t.\t-\t.\tgene_id "ENSG00000269831"; transcript_id "ENST00000599533"; exon_number "3"; gene_name "AL669831.1"; gene_biotype "protein_coding"; transcript_name "AL669831.1-201"; exon_id "ENSE00003138540";\n+1\tprotein_coding\tCDS\t738532\t738618\t.\t-\t0\tgene_id "ENSG00000269831"; transcript_id "ENST00000599533"; exon_number "3"; gene_name "AL669831.1"; gene_biotype "protein_coding"; transcript_name "AL669831.1-201"; protein_id "ENSP00000468859";\n+1\tprocessed_transcript\texon\t752753\t753092\t.\t-\t.\tgene_id "ENSG00000240453"; transcript_id "ENST00000435300"; exon_number "1"; gene_name "RP11-206L10.10"; gene_biotype "processed_transcript"; transcript_name "RP11-206L10.10-001"; exon_id "ENSE00001746491";\n+1\tprocessed_transcript\texon\t745489\t745550\t.\t-\t.\tgene_id "ENSG00000240453"; transcript_id "ENST00000435300"; exon_number "2"; gene_name "RP11-206L10.10"; gene_biotype "processed_transcript"; transcript_name "RP11-206L10.10-001"; exon_id "ENSE00001674926";\n'
b
diff -r 000000000000 -r ca988deacfd1 test-data/rsem_sample.gene_abundances.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rsem_sample.gene_abundances.tabular Fri Feb 07 08:07:29 2014 -0500
b
@@ -0,0 +1,54 @@
+ENSG00000185097 0.00 0 ENST00000332831
+ENSG00000186092 0.00 0 ENST00000335137
+ENSG00000198744 147.00 0.0955859613080923 ENST00000416718
+ENSG00000222623 0.00 0 ENST00000410691
+ENSG00000223181 0.00 0 ENST00000411249
+ENSG00000223659 0.00 0 ENST00000452176
+ENSG00000223972 0.00 0 ENST00000450305,ENST00000456328,ENST00000515242,ENST00000518655
+ENSG00000224813 0.00 0 ENST00000445840
+ENSG00000224956 3.85 0.00103471956451253 ENST00000506640
+ENSG00000225630 176.00 0.0565222831182978 ENST00000457540
+ENSG00000225972 31.00 0.031536864785323 ENST00000416931
+ENSG00000227232 9.00 0.0016492259329919 ENST00000423562,ENST00000438504,ENST00000488147,ENST00000538476,ENST00000541675
+ENSG00000228327 12.56 0.0031549934286337 ENST00000428504
+ENSG00000228463 110.35 0.0265146476181162 ENST00000335577,ENST00000424587,ENST00000442116,ENST00000448958
+ENSG00000229344 232.00 0.118008913390241 ENST00000427426
+ENSG00000229376 0.00 0 ENST00000440782
+ENSG00000229905 0.00 0 ENST00000422528
+ENSG00000230021 52.65 0.0297508289367811 ENST00000414688,ENST00000419394,ENST00000440200,ENST00000441245,ENST00000447954,ENST00000448605
+ENSG00000230092 0.00 0 ENST00000447500,ENST00000590817
+ENSG00000231709 0.00 0 ENST00000417636
+ENSG00000233653 1.80 0.000234465887962401 ENST00000432723
+ENSG00000233750 25.71 0.00216201488099237 ENST00000442987
+ENSG00000235146 0.00 0 ENST00000423796,ENST00000450696
+ENSG00000235249 0.00 0 ENST00000426406
+ENSG00000235373 154.04 0.0101246303660993 ENST00000416385
+ENSG00000236601 0.00 0 ENST00000412666,ENST00000450983
+ENSG00000236679 18.51 0.0180708915534142 ENST00000458203
+ENSG00000236743 0.00 0 ENST00000441866
+ENSG00000237094 68.79 0.0375906700656605 ENST00000419160,ENST00000423728,ENST00000425496,ENST00000431321,ENST00000431812,ENST00000432964,ENST00000440038,ENST00000440163,ENST00000453935,ENST00000455207,ENST00000455464,ENST00000599771,ENST00000601486,ENST00000601814
+ENSG00000237491 3.00 0.0028508085192879 ENST00000358533,ENST00000412115,ENST00000429505,ENST00000434264,ENST00000443772,ENST00000457084,ENST00000585745,ENST00000585768,ENST00000585826,ENST00000586288,ENST00000586928,ENST00000587126,ENST00000587530,ENST00000588951,ENST00000589531,ENST00000589899,ENST00000590848,ENST00000591440,ENST00000591702,ENST00000592547,ENST00000593022
+ENSG00000237613 4.88 0.0029164490537218 ENST00000417324,ENST00000461467
+ENSG00000237683 1.64 0.000199085235466067 ENST00000423372
+ENSG00000237973 483.00 0.102851490150648 ENST00000414273
+ENSG00000238009 100.99 0.0522145032088959 ENST00000453576,ENST00000466430,ENST00000471248,ENST00000477740
+ENSG00000239664 44.28 0.0320323295763644 ENST00000357876,ENST00000440196
+ENSG00000239906 0.00 0 ENST00000493797
+ENSG00000239945 6.36 0.00159599541100089 ENST00000495576
+ENSG00000240361 0.00 0 ENST00000492842
+ENSG00000240409 47.00 0.102222941028288 ENST00000467115
+ENSG00000240453 0.00 0 ENST00000435300
+ENSG00000240618 0.00 0 ENST00000417659
+ENSG00000241599 0.00 0 ENST00000496488
+ENSG00000241670 0.32 0.000330057780272038 ENST00000424429,ENST00000450734
+ENSG00000241860 178.16 0.0259713923786809 ENST00000466557,ENST00000484859,ENST00000490997,ENST00000491962
+ENSG00000243485 0.00 0 ENST00000469289,ENST00000473358,ENST00000607096
+ENSG00000248527 304.00 0.154882179236481 ENST00000514057
+ENSG00000250575 0.00 0 ENST00000514436
+ENSG00000256186 0.00 0 ENST00000540477
+ENSG00000268020 0.00 0 ENST00000594647,ENST00000606857
+ENSG00000268663 0.00 0 ENST00000438434
+ENSG00000268903 0.00 0 ENST00000494149
+ENSG00000269732 0.00 0 ENST00000437905
+ENSG00000269831 19.12 0.0899916575837746 ENST00000599533
+ENSG00000269981 0.00 0 ENST00000595919
b
diff -r 000000000000 -r ca988deacfd1 test-data/rsem_sample.isoform_abundances.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rsem_sample.isoform_abundances.tabular Fri Feb 07 08:07:29 2014 -0500
b
@@ -0,0 +1,117 @@
+ENST00000332831 0.00 0 ENSG00000185097
+ENST00000335137 0.00 0 ENSG00000186092
+ENST00000416718 147.00 0.0955859613080923 ENSG00000198744
+ENST00000410691 0.00 0 ENSG00000222623
+ENST00000411249 0.00 0 ENSG00000223181
+ENST00000452176 0.00 0 ENSG00000223659
+ENST00000450305 0.00 0 ENSG00000223972
+ENST00000456328 0.00 0 ENSG00000223972
+ENST00000515242 0.00 0 ENSG00000223972
+ENST00000518655 0.00 0 ENSG00000223972
+ENST00000445840 0.00 0 ENSG00000224813
+ENST00000506640 3.85 0.00103471956451253 ENSG00000224956
+ENST00000457540 176.00 0.0565222831182978 ENSG00000225630
+ENST00000416931 31.00 0.031536864785323 ENSG00000225972
+ENST00000423562 0.00 2.91539788907381e-166 ENSG00000227232
+ENST00000438504 9.00 0.0016492259329919 ENSG00000227232
+ENST00000488147 0.00 1.8960160271408e-274 ENSG00000227232
+ENST00000538476 0.00 2.3616483533805e-287 ENSG00000227232
+ENST00000541675 0.00 1.45389696655984e-170 ENSG00000227232
+ENST00000428504 12.56 0.0031549934286337 ENSG00000228327
+ENST00000335577 14.65 0.0103141278598275 ENSG00000228463
+ENST00000424587 0.00 0 ENSG00000228463
+ENST00000442116 0.00 0 ENSG00000228463
+ENST00000448958 95.70 0.0162005197582888 ENSG00000228463
+ENST00000427426 232.00 0.118008913390241 ENSG00000229344
+ENST00000440782 0.00 0 ENSG00000229376
+ENST00000422528 0.00 0 ENSG00000229905
+ENST00000414688 0.00 0 ENSG00000230021
+ENST00000419394 0.00 0 ENSG00000230021
+ENST00000440200 0.00 0 ENSG00000230021
+ENST00000441245 51.65 0.0287269047554395 ENSG00000230021
+ENST00000447954 0.00 0 ENSG00000230021
+ENST00000448605 1.00 0.00102392418134165 ENSG00000230021
+ENST00000447500 0.00 0 ENSG00000230092
+ENST00000590817 0.00 0 ENSG00000230092
+ENST00000417636 0.00 0 ENSG00000231709
+ENST00000432723 1.80 0.000234465887962401 ENSG00000233653
+ENST00000442987 25.71 0.00216201488099237 ENSG00000233750
+ENST00000423796 0.00 0 ENSG00000235146
+ENST00000450696 0.00 0 ENSG00000235146
+ENST00000426406 0.00 0 ENSG00000235249
+ENST00000416385 154.04 0.0101246303660993 ENSG00000235373
+ENST00000412666 0.00 0 ENSG00000236601
+ENST00000450983 0.00 0 ENSG00000236601
+ENST00000458203 18.51 0.0180708915534142 ENSG00000236679
+ENST00000441866 0.00 0 ENSG00000236743
+ENST00000419160 0.00 0 ENSG00000237094
+ENST00000423728 1.88 0.00115930540044037 ENSG00000237094
+ENST00000425496 18.51 0.00224573461052847 ENSG00000237094
+ENST00000431321 0.00 0 ENSG00000237094
+ENST00000431812 0.00 0 ENSG00000237094
+ENST00000432964 0.00 1.37050756450295e-07 ENSG00000237094
+ENST00000440038 3.08 0.00142205894492893 ENSG00000237094
+ENST00000440163 0.00 0 ENSG00000237094
+ENST00000453935 44.28 0.0320323295763644 ENSG00000237094
+ENST00000455207 0.00 0 ENSG00000237094
+ENST00000455464 0.00 0 ENSG00000237094
+ENST00000599771 0.72 0.000571012719121621 ENSG00000237094
+ENST00000601486 0.32 0.000160091763520284 ENSG00000237094
+ENST00000601814 0.00 0 ENSG00000237094
+ENST00000358533 0.00 0 ENSG00000237491
+ENST00000412115 0.00 0 ENSG00000237491
+ENST00000429505 0.00 0 ENSG00000237491
+ENST00000434264 0.00 0 ENSG00000237491
+ENST00000443772 0.00 0 ENSG00000237491
+ENST00000457084 0.00 0 ENSG00000237491
+ENST00000585745 0.00 0 ENSG00000237491
+ENST00000585768 3.00 0.0028508085192879 ENSG00000237491
+ENST00000585826 0.00 0 ENSG00000237491
+ENST00000586288 0.00 0 ENSG00000237491
+ENST00000586928 0.00 0 ENSG00000237491
+ENST00000587126 0.00 0 ENSG00000237491
+ENST00000587530 0.00 0 ENSG00000237491
+ENST00000588951 0.00 0 ENSG00000237491
+ENST00000589531 0.00 0 ENSG00000237491
+ENST00000589899 0.00 0 ENSG00000237491
+ENST00000590848 0.00 0 ENSG00000237491
+ENST00000591440 0.00 0 ENSG00000237491
+ENST00000591702 0.00 0 ENSG00000237491
+ENST00000592547 0.00 0 ENSG00000237491
+ENST00000593022 0.00 0 ENSG00000237491
+ENST00000417324 0.00 0 ENSG00000237613
+ENST00000461467 4.88 0.0029164490537218 ENSG00000237613
+ENST00000423372 1.64 0.000199085235466067 ENSG00000237683
+ENST00000414273 483.00 0.102851490150648 ENSG00000237973
+ENST00000453576 0.00 0 ENSG00000238009
+ENST00000466430 9.01 0.0010582104286815 ENSG00000238009
+ENST00000471248 91.97 0.0511562927802144 ENSG00000238009
+ENST00000477740 0.00 0 ENSG00000238009
+ENST00000357876 44.28 0.0320323295763644 ENSG00000239664
+ENST00000440196 0.00 0 ENSG00000239664
+ENST00000493797 0.00 0 ENSG00000239906
+ENST00000495576 6.36 0.00159599541100089 ENSG00000239945
+ENST00000492842 0.00 0 ENSG00000240361
+ENST00000467115 47.00 0.102222941028288 ENSG00000240409
+ENST00000435300 0.00 0 ENSG00000240453
+ENST00000417659 0.00 0 ENSG00000240618
+ENST00000496488 0.00 0 ENSG00000241599
+ENST00000424429 0.32 0.000330057780272038 ENSG00000241670
+ENST00000450734 0.00 9.22180424920413e-61 ENSG00000241670
+ENST00000466557 9.24 0.00235213652714706 ENSG00000241860
+ENST00000484859 159.94 0.0105128565610323 ENSG00000241860
+ENST00000490997 0.00 0 ENSG00000241860
+ENST00000491962 8.98 0.0131063992905015 ENSG00000241860
+ENST00000469289 0.00 0 ENSG00000243485
+ENST00000473358 0.00 0 ENSG00000243485
+ENST00000607096 0.00 0 ENSG00000243485
+ENST00000514057 304.00 0.154882179236481 ENSG00000248527
+ENST00000514436 0.00 0 ENSG00000250575
+ENST00000540477 0.00 0 ENSG00000256186
+ENST00000594647 0.00 0 ENSG00000268020
+ENST00000606857 0.00 0 ENSG00000268020
+ENST00000438434 0.00 0 ENSG00000268663
+ENST00000494149 0.00 0 ENSG00000268903
+ENST00000437905 0.00 0 ENSG00000269732
+ENST00000599533 19.12 0.0899916575837746 ENSG00000269831
+ENST00000595919 0.00 0 ENSG00000269981
b
diff -r 000000000000 -r ca988deacfd1 test-data/rsem_sample.rsem_log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rsem_sample.rsem_log.txt Fri Feb 07 08:07:29 2014 -0500
b
b'@@ -0,0 +1,2333 @@\n+bowtie -q --phred33-quals -n 2 -e 99999999 -l 25 -p 1 -a -m 200 -S /panfs/roc/galaxy/NEXTGEN/database/files/007/dataset_7225_files/ref /panfs/roc/galaxy/NEXTGEN/database/files/007/dataset_7248.dat | gzip > rsem_output.sam.gz\n+\n+/website/galaxy-ng.msi.umn.edu/NEXTGEN/tool_dependencies/rsem/1.1.17/jjohnson/package_rsem_1_1_17/7d060ea51c6f/bin/rsem-parse-alignments /panfs/roc/galaxy/NEXTGEN/database/files/007/dataset_7225_files/ref rsem_output rsem_output s rsem_output.sam.gz -t 1 -tag XM\n+Done!\n+\n+/website/galaxy-ng.msi.umn.edu/NEXTGEN/tool_dependencies/rsem/1.1.17/jjohnson/package_rsem_1_1_17/7d060ea51c6f/bin/rsem-build-read-index 32 1 0 rsem_output.temp/rsem_output_alignable.fq\n+Build Index rsem_output.temp/rsem_output_alignable.fq is Done!\n+\n+/website/galaxy-ng.msi.umn.edu/NEXTGEN/tool_dependencies/rsem/1.1.17/jjohnson/package_rsem_1_1_17/7d060ea51c6f/bin/rsem-run-em /panfs/roc/galaxy/NEXTGEN/database/files/007/dataset_7225_files/ref 1 rsem_output rsem_output -p 1\n+Refs.loadRefs finished!\n+DAT 0 reads left!\n+Thread 0 : N = 2239, NHit = 6279\n+EM_init finished!\n+estimateFromReads, N1 finished.\n+ROUND = 1, SUM = 2239, bChange = 23.925859, totNum = 117\n+ROUND = 2, SUM = 2239, bChange = 5.697505, totNum = 49\n+ROUND = 3, SUM = 2239, bChange = 6.700577, totNum = 46\n+ROUND = 4, SUM = 2239, bChange = 0.939787, totNum = 41\n+ROUND = 5, SUM = 2239, bChange = 0.807837, totNum = 35\n+ROUND = 6, SUM = 2239, bChange = 0.810063, totNum = 32\n+ROUND = 7, SUM = 2239, bChange = 0.759904, totNum = 29\n+ROUND = 8, SUM = 2239, bChange = 0.759764, totNum = 28\n+ROUND = 9, SUM = 2239, bChange = 0.759686, totNum = 28\n+ROUND = 10, SUM = 2239, bChange = 0.530379, totNum = 23\n+ROUND = 11, SUM = 2239, bChange = 0.530535, totNum = 23\n+ROUND = 12, SUM = 2239, bChange = 0.530603, totNum = 23\n+ROUND = 13, SUM = 2239, bChange = 0.530637, totNum = 23\n+ROUND = 14, SUM = 2239, bChange = 0.530653, totNum = 22\n+ROUND = 15, SUM = 2239, bChange = 0.498065, totNum = 21\n+ROUND = 16, SUM = 2239, bChange = 0.308149, totNum = 21\n+ROUND = 17, SUM = 2239, bChange = 0.309653, totNum = 21\n+ROUND = 18, SUM = 2239, bChange = 0.310894, totNum = 19\n+ROUND = 19, SUM = 2239, bChange = 0.311931, totNum = 19\n+ROUND = 20, SUM = 2239, bChange = 0.312807, totNum = 18\n+ROUND = 21, SUM = 2239, bChange = 0.313555, totNum = 18\n+ROUND = 22, SUM = 2239, bChange = 0.314200, totNum = 17\n+ROUND = 23, SUM = 2239, bChange = 0.314761, totNum = 16\n+ROUND = 24, SUM = 2239, bChange = 0.315253, totNum = 16\n+ROUND = 25, SUM = 2239, bChange = 0.315689, totNum = 16\n+ROUND = 26, SUM = 2239, bChange = 0.316076, totNum = 16\n+ROUND = 27, SUM = 2239, bChange = 0.316423, totNum = 16\n+ROUND = 28, SUM = 2239, bChange = 0.316735, totNum = 15\n+ROUND = 29, SUM = 2239, bChange = 0.317017, totNum = 15\n+ROUND = 30, SUM = 2239, bChange = 0.317273, totNum = 15\n+ROUND = 31, SUM = 2239, bChange = 0.317506, totNum = 15\n+ROUND = 32, SUM = 2239, bChange = 0.317719, totNum = 15\n+ROUND = 33, SUM = 2239, bChange = 0.317915, totNum = 15\n+ROUND = 34, SUM = 2239, bChange = 0.318095, totNum = 13\n+ROUND = 35, SUM = 2239, bChange = 0.318261, totNum = 13\n+ROUND = 36, SUM = 2239, bChange = 0.318414, totNum = 13\n+ROUND = 37, SUM = 2239, bChange = 0.152793, totNum = 11\n+ROUND = 38, SUM = 2239, bChange = 0.152769, totNum = 11\n+ROUND = 39, SUM = 2239, bChange = 0.152749, totNum = 11\n+ROUND = 40, SUM = 2239, bChange = 0.152732, totNum = 10\n+ROUND = 41, SUM = 2239, bChange = 0.152717, totNum = 10\n+ROUND = 42, SUM = 2239, bChange = 0.152705, totNum = 10\n+ROUND = 43, SUM = 2239, bChange = 0.152694, totNum = 10\n+ROUND = 44, SUM = 2239, bChange = 0.152685, totNum = 9\n+ROUND = 45, SUM = 2239, bChange = 0.152677, totNum = 9\n+ROUND = 46, SUM = 2239, bChange = 0.152671, totNum = 9\n+ROUND = 47, SUM = 2239, bChange = 0.152665, totNum = 9\n+ROUND = 48, SUM = 2239, bChange = 0.152661, totNum = 9\n+ROUND = 49, SUM = 2239, bChange = 0.152657, totNum = 9\n+ROUND = 50, SUM = 2239, bChange = 0.152653, totNum = 9\n+ROUND = 51, SUM = 2239, bChange '..b', totNum = 1\n+ROUND = 2248, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2249, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2250, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2251, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2252, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2253, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2254, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2255, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2256, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2257, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2258, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2259, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2260, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2261, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2262, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2263, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2264, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2265, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2266, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2267, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2268, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2269, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2270, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2271, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2272, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2273, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2274, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2275, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2276, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2277, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2278, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2279, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2280, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2281, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2282, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2283, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2284, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2285, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2286, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2287, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2288, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2289, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2290, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2291, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2292, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2293, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2294, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2295, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2296, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2297, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2298, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2299, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2300, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2301, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2302, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2303, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2304, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2305, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2306, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2307, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2308, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2309, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2310, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2311, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2312, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2313, SUM = 2239, bChange = 0.003898, totNum = 1\n+ROUND = 2314, SUM = 2239, bChange = 0.000000, totNum = 0\n+Expression Results are written!\n+Time Used for EM.cpp : 0 h 00 m 00 s\n+\n+rm -rf rsem_output.temp\n+\n'
b
diff -r 000000000000 -r ca988deacfd1 test-data/test.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fq Fri Feb 07 08:07:29 2014 -0500
b
b"@@ -0,0 +1,8956 @@\n+@AMELIA:165:C03TUABXX:5:1101:7046:33695\n+CTGGGAATCCTAGGGGGCTCCATGACACCTTCCCCCCCAGACCCAGACTTGGGCTGTTGCTCTGACATGGACACAGCCAGGACAAGCTGCTCAGACCTGCT\n++\n+CCCFFFFFHHGHHGHJIJIHIIJI@FGIIIIIIIIIJIJFFHHDFFBCCECEDBDDDDCCDDCDDDDDDCACCADDBDDDB?ACDDDDDCDDDCACADCBD\n+@AMELIA:165:C03TUABXX:5:1101:20072:31903\n+TGGGAAAGGACCTGGGGCTGGTGAGGGGCCCGGAGGAGCCTTTGCCCGCGTGTCAGACTCCATCCCTCCTCTGCCGCCACCGCAGCAGCCACAGGCAGAGG\n++\n+CCCFFFFFHHHHHJJJJJJJJFHIJJJJJIJIIIIJIIJJHHHHHHFFFD@@=BCDDDDDDDDDDDDDDDDCCCDDDDDDDDDDDDDDDDDDDBDDDDBB@\n+@AMELIA:165:C03TUABXX:5:1101:17094:58336\n+GCTAGAGTCCATCCGCCAAGCTGGGGGCATCGGCAAGGCCAAGCTGCGCAGCGTGAAGGAGCGAAAGCTGGAGAAGAAGAAGCAGAAGGAGCAGGAGCAAG\n++\n+@C@FFFFDHHHHHJJIIIGGIJIIIJJIJEGIJGIJJIGJGGIHHHHFFDDDDBDBBCDBDDDBBDBDDDC?B?A?>CCCDDDDDDDDDBBDDCD@BDDCD\n+@AMELIA:165:C03TUABXX:5:1101:12097:28848\n+AGATTGATGTTCCATCCTACCTGCCTGACCTGCCCGGCATTGCCAACGACCTCATGTACATTGCCGACCTGGGCCCCGGCATTGCCCCCTCTGCCCCTGGC\n++\n+@?@DDB>DFA?FAGGGH<HHFHDG>CHEHIIIG>D@F@:@DHEEIGGG7AB<@CED4=DHCHHFFH=D3>3(((,,8809BBC>@@@0)99C::>888?BB\n+@AMELIA:165:C03TUABXX:5:1101:4793:38825\n+AAAGACCCATGTGATGCTGGGGGCAGAGACAGAGGAGAAGCTGTTTGATGCCCCCTTGTCCATCAGCAAGAGAGAGCAGCTGGAACAGCAGGTCCCAGAGA\n++\n+@CCDDFDDHHHFHGGGGFFIHGEGEEADHGEDHIGIGCEHGIGCFFHIJHIJJIHHHCEEDC?C@CCECDCBC@???ACBCCCBCCDCBCDDDDCACCDBD\n+@AMELIA:165:C03TUABXX:5:1101:13000:2187\n+GGACAAGGGGGCATCAAACAGCTTCTCCTCTGTCTCTGCCCCCAGCATCACATGGGTCTTTGTTACAGCACCAGCCAGGGGGTCCAGGAAGACATACTTCT\n++\n+CCCFFFFFHHHHGJJJJJJJJJJJJJJJIGJIIJJJJJJIJJIIIJIJJJJIJJHHHGFFFFEFCEEEECDDDBD@ABDDDDDDDDDDDDDDDDDCDDDDD\n+@AMELIA:165:C03TUABXX:5:1101:11402:2351\n+CTCTGTCAGCTCCTTGCTGCTCTTCAACACCACCGAGAACCTGTACAAGAAGTATGTCTTCCTGGACCCCCTGGCTGGTGCTGTAACAAAGACCCATGTGA\n++\n++=<DFFFFHHGGGGIGGEHHIIJIIIGHGHIGGGGBBGEHIIIIJJIJIHIGBBDHEGHIIDHDE@EHFEDFFDAC@>?CDDDCCDDDDBCDDDDBDDEDD\n+@AMELIA:165:C03TUABXX:5:1101:21256:22282\n+CCTTGATCTTCTCAATCTTGGCCTGGGCCAAGGAGACCTTCTCTCCAATGGCCTGCACCTGGCTCCGGCTCTGCTCTACCCGCTGGGAGATCCTGCTGAAG\n++\n+?=@A=DDDA<,ACGIEFDHHFFGG7+<8ABGGBE3?F19DB?DFDAEI98BDF97CBFE>>6@EE@884='.(;@>ABA(58;B#################\n+@AMELIA:165:C03TUABXX:5:1101:19155:32547\n+GTGCCCTTCATCCAGCCAGACCTGCGGCGAGAGGAGGCCGTCCAGCAGATGGCGGATGCCCTGCAGTACCTGCAGAAGGTCTCTGGAGACATCTTCAGCAG\n++\n+??<DDDDBBBFHHGEBGHIIGHG@ECDFAHHGHI>@;@CE:?ECECCEC@CCCB?@-:B@CCBBCCCACC:4<CA@CC?9@>CCA:>?C@?CCCCDDCA@<\n+@AMELIA:165:C03TUABXX:5:1101:5175:49782\n+CTGGTCTTGAACTCCTGACCTCAGGTGATCCACCCGCCTTGGCCTTCCAAAGTGCTGGGATTACAGGCATGAGCCATTGCACCTGGCCAAAGTAGTGGAAC\n++\n+CCCFDFFFHHHHHJJJJGJJJJJJJHIJJJJJJJJJJJJJJJJJJJJJJJJJHIJJJJJIJJJJJJJHHHHFFFFFEEEEEEDDDDDDDDDDDDDDDDDDD\n+@AMELIA:165:C03TUABXX:5:1101:10498:50029\n+CTGGTCTTGAACTCCTCGCGTCAAGCAATCCTCCTGCCTTCACCTCCCAAAGTGCTGGGACGACGGGCATGAGCCACCATGCGTGGCATATGGCACAATCT\n++\n+@@@DBDDDHHDHHIGIIAFGHIGHGEGDDHIIIIIGIIIIIIIBFGGGIFGHCHGIIIIHGEBBADDD=CAA@ACAC?ACDDDDBBDCDDDDDDCDD?CCD\n+@AMELIA:165:C03TUABXX:5:1101:2198:8854\n+TGCCCAGGCTGGTCTTAAACTCCTGGACTCAAGTGATCCTCCTGGCTCAGCCTCCCAAAGTGCTAGGATTATAGGCATGCGCCACCACACCTGGTGGAGTT\n++\n+@@@DDFFFFHHFHGIIIJIIJGGHH>FIJFGCG@DFHGHIJ?@GEE>EHHEGGCDHGID>/=FADH@@@DE<CEEHEFFFD?C>B?=BBB?CCD:??<A>C\n+@AMELIA:165:C03TUABXX:5:1101:18944:7621\n+CACTTTGGGAGGCTGAGGCGGGTGGATCACCTGAGGTCAGGAGTTCAAAGCCAGCCTGGCCAAAATGGTGAAACCCCATCTCTACTAAAAATAGAAAAATG\n++\n+B@CFFFFFHHHHHJJJIJJJJJ;AHHGEFHHGFDEF@CEEE@B?CCACD@CDC<?B?ABBDCDCDCCD:ACDDCDDDDDDDDDDDEDDCDDDDCCCDDDAC\n+@AMELIA:165:C03TUABXX:5:1101:5915:23673\n+CAGGATGGTCTCGATCTCCTGACCTCGTGATCCACCTGCCTCAGCCTCCCAAAGTGTTGGGATTACAGGCATGAGCTACCGCACCCAGCCAAATTCTTTTA\n++\n+BCCFFFFFHHGHHJJJIJJHIJJJJJJGIJJJIIIJJIIIIJIJIJIIJJJJJJEFFHIJJJJIJJHHHHHFFFFDCEEEDDDDDDDDDDDDDDDDEEDDD\n+@AMELIA:165:C03TUABXX:5:1101:8218:36310\n+GTTTCACCATGTTAGCCAGGATGGTCTCTATCTCCTGACCTCGTGATCCACCGGCCTCGGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGTGCC\n++\n+?@@D?DD?ACC<CG<GAFF9G:AFAEBFHH?HEIFFBBFDFF1?0BFA?FBBFBA;A854CF<A?E>;BDDDADA3;@BA55;;??;,8@###########\n+@AMELIA:165:C03TUABXX:5:1101:4159:42467\n+GTTTCACCATGTTAGCCAGGATGGTTTTGATCTCCTGACCTCGTGATCCACCCACCTTGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACGGCACC\n++\n+=??ADDDABCDFFEGFF<DE+?<13ACA;8E4EFICFII;BF;:BFDBF*9B@D@FAFGBEE>DA=>EAF?CD;>B="..b'165:C03TUABXX:5:1101:15704:16750\n+CTCGAACTCATGACCTCAAGTGATCCGCCCACTTCGGTCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACGGCGCCTGGCCCCCAAATGCTCTTGAACC\n++\n+C@CFFFFFHHHHGJGGGIIJJJIJJJJJIJIIIJIIJDHIIJIIIIJFHIGJIEFHHC>@HIEEEEAHBDCEBCB?BDDDBDBBDDB?<CACCCDDDDDD<\n+@AMELIA:165:C03TUABXX:5:1101:9597:17658\n+CTGGTCTCGAACTCCTTACCTCAAGTGATCCGCCCACCTCAGCCTCCCAGACTGCTGGGATTACAGGCATGAGCCACCTTGCCCGGCTCACCCGGTCGCCT\n++\n+CCCFDEFFHHHHHJJJJJJJJJJJJHIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJIHHHHFFFFFEEEEEDDDDDDDDDDDDDDDDDDDDDD>\n+@AMELIA:165:C03TUABXX:5:1101:4379:20102\n+CCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCATGCCTGGCCAACAAAATCCTTTAAAATGGACCAAAGAAGATCGGAAGAGCACAC\n++\n+@C@DFFFFHGHHGIIJJJJICHHJJJJJJJJJJJJ@FGGGHIIJJJJJIJIJJJIJIIJJHHHGFFFFFFFDEEEEEDDDDBDDDDDDDDDBDDDDDDDDD\n+@AMELIA:165:C03TUABXX:5:1101:15209:22423\n+CTGATCTCAAGTAATCCACCTGCCTCAGCCTTCCAAAGCGTTGGGATTACGGGTGTGAGCCACTGCGCCCGGGCCAGCATTACTTCTGAGCTCTGCTCCCT\n++\n+CCCFFFFFHGHHHIIJIJJJJJJJJJJIIJJJJJIJJJJIHJIJJJIJIHHGJFHHIJIIGGGHHFFBCD?BDDD?BDDDDCDDDDDDDDDDDDDDCDDDD\n+@AMELIA:165:C03TUABXX:5:1101:16900:24280\n+CTGGTCTCGAACTCCTGACCTCAAGTGATCCGCCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCATGCCCGGTCTTAAAACCAGTTT\n++\n+@@@FDDFFHDHHHIJJJIIJIJJEIBHCHIHAFHFGGH@DEEEBBGHIFGGIHIJHIHIGDHHHHHF7@?DCCDECED@CCCCCDD;=BCDEACCBDDAC:\n+@AMELIA:165:C03TUABXX:5:1101:6657:30654\n+CTGGTCTTGAACTCCTGACCTCAGGTGATCTGCCCACCTCGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCGCGCCTGGCCAGTTTCCCCTATT\n++\n+@C@DADDFHHHHHJJJIGIIJJIIIAFHIIIJJIIIIIJIGHGGGIJJGGIIBFGBGIGECHJIIIIHFEEEFFFDCBDDDBDDDBBDDDADDEDCDDDDD\n+@AMELIA:165:C03TUABXX:5:1101:5086:37233\n+CTTGAACTCCTGACCTCAGGTGACCCGCCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGTGTAAGCCACCGCACTCAGCCATGCCTGCTGTTTCTCAA\n++\n+@CCFFFFDFFHHGHFHHIIE<CBFHIGJIHHIHAFBGGBGHGHIJIIDDECDAFAFCGHGIJ7C?EEEHEFBEDDCDDDCDDDD@ACCDDD@C4>>CCCCD\n+@AMELIA:165:C03TUABXX:5:1101:11272:42211\n+CTGGTCTTGAACTCCTGGCCTCAAGCAATCCACCCACCCTGGCCTCCCAAAATGTTGGGATTATAAGCATGCACCACAGTGCCCAGACAGATATGTACACT\n++\n+@CCFDDFFHHHHHJJJJJGIJJJJJJIJIJJIIJIIJIGIIJJJJIJJIJJJJIFHIIIGIEHHHHHGHEFFFFFEDDDCDDDDCDDDDDDDCDDCDCDDD\n+@AMELIA:165:C03TUABXX:5:1101:8065:47791\n+CTCGAACTCCTGACCTCAAGTGATCCACCTGCCTCTGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCGTGCCCGGCTTATTTCTTGGCTCACTTA\n++\n+CCCFFFFFHHHHHGIJJJJJHIJJJJJJJJJJJIJIJJJIJJJJJIJFHIJJJJIJJJJJIJCHCHHJIJHHHHFBDFDDDDDDDDEEEEDDDDDDDDDDD\n+@AMELIA:165:C03TUABXX:5:1101:12377:49621\n+CTGACCTCAGGTGATCCACCTGCCTCGGCCTCCCAAAGTGCAAGGATTACAGGCATGAGCCACTGTGCCCAGCCCCTAATTTGTGTATTTTTAGTAGAGGC\n++\n+@C@DDEDFGHGHDHEFHGIIHIIIIIIIIGGIIIIGIIDDGIDIIGGGIIIIIIIIGIGHIAG@ECCHHHGFFFFBDAACDDD>AA>CDDDDDDDEEDDDB\n+@AMELIA:165:C03TUABXX:5:1101:5175:49782\n+CTGGTCTTGAACTCCTGACCTCAGGTGATCCACCCGCCTTGGCCTTCCAAAGTGCTGGGATTACAGGCATGAGCCATTGCACCTGGCCAAAGTAGTGGAAC\n++\n+CCCFDFFFHHHHHJJJJGJJJJJJJHIJJJJJJJJJJJJJJJJJJJJJJJJJHIJJJJJIJJJJJJJHHHHFFFFFEEEEEEDDDDDDDDDDDDDDDDDDD\n+@AMELIA:165:C03TUABXX:5:1101:19721:51721\n+GGTCTCGAACTCCTTACCTCAAGTGATCTACCTGCCTCAGCCTCCCAAAGTTTTGGGATTACAGGTGTGAGCCACAGTGCGAGGCTTGATGATTAAATGAC\n++\n+CCBFFFFFHHGHHJJJJGIIJJJIJJJJJJJJJJJI@GIIJJIJJJIJJIIJIIIJJIJIJIIIJ=D@DEHHFFHFFCCDDADDBBDDACCACDEDCDDDC\n+@AMELIA:165:C03TUABXX:5:1101:12815:55905\n+TCGAACTCCTGACCTCAAGTGATCCACCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGCGAGCCACTGCACACAGGCAAAAGCCCCTTGCTTCTCA\n++\n+@@@FFFDFHHHDHJIHHEHEHHIIJJJIGGGGGIIIEIGIIJJIJIDDDGEBFFGIGIGIIJHGEDDCDCDDCDDCDD@DDDDDDDCDDDDDBDDDCDDDC\n+@AMELIA:165:C03TUABXX:5:1101:19331:56948\n+CTCGAACTCCTGACCTCATGTGATCCACCTGCCTTGGCCTCCCAAAGTGTTGGGATTACAGGCATAAGCCACCACACCCAGCCAGAACTATTACTTCTTTT\n++\n+@C@DDDFFHHHBFGGEHDHEHIIIIGHHGIGIBFHGCHIIGGDHHCG9?FGDHB=CFFGHIIIGEHIGAEEHHFFDE?AABB=;=<>CA@CD>CCCCCCCC\n+@AMELIA:165:C03TUABXX:5:1101:3230:48975\n+CGGGTATGGTGGCTCACACCTGTAATCCCATCACTTTAGGAGGCCAAGGTGGACAGATCGCTTGACTCCAGGAGTTCAAGACCAGCCTGGGCAACACGGCG\n++\n+?@@DD@DDFAFBBGIIIIIIIIDHHIIIIIIIAHEIIIIIBDEEIIIIIB?DHHGCGGEEGGHGBCCCCEBDDE;@@DECCCCCCBCCA?BAA?@BBB>@<\n+@AMELIA:165:C03TUABXX:5:1101:19905:9501\n+GGGCATGGTGGCTCATTCCTGTAATCCCAGCACATTGGGAGGCTGAGGCAGGAGGATCGCTTGAGGCTAGGAGTTCAAGACCAACCTGGGAGATCGGAAGA\n++\n+CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHHHFFFFEEECE;ACDEDDDDDDDDDDDDDDDDDDDDDDB\n'
b
diff -r 000000000000 -r ca988deacfd1 test-data/trinity_assembled_transcripts.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/trinity_assembled_transcripts.fasta Fri Feb 07 08:07:29 2014 -0500
[
b'@@ -0,0 +1,2909 @@\n+>comp6_c0_seq1 len=235 path=[2595:0-234]\n+CGGCCAGGGGCTTGTGGAGGTTAAAGTTTCTGTACAGTCCGCTTCCAGTCTCGTGTATCT\n+TCTCTGGAGTGTTGTTTTGCAGTGCGGGGTGAGCTGGTGTGGGTGTTTGAATGCTGCTGT\n+TTGCTGTTTTTTAAAGGCCTTCTAGCCCTCCCATGGGGTTGCTGCAGCAGGAATTCTTAC\n+CTGTGCTTCAGCCCAGCATACAGACTGCTGACAGGCACCACACAGTCACTCGAGG\n+>comp12_c0_seq1 len=239 path=[4973:0-238]\n+GGGTCATGGCCCGCACGGTCTCTGAGAGAGAGAAAGAGAGAAGGGAGACCACGTTTGGGA\n+TAGCTGGGGCTCAGGGCCAAGCCTGGGGATAGTCTGCAGATGCAGGTGGGGTGCCAGGAG\n+GAGGGAGCAGGCTGCAAAGGCGTCTTGCAAGCAGTGTACTTCCTGGCTCCCGGAGTTCGG\n+TGTGGGTGTCGGGAAGGGGAGATTCCCCGAGGAAAGAAAATGGACTTGAAGACAAGTCC\n+>comp17_c0_seq1 len=215 path=[7123:0-214]\n+AAGCTGGCCTCAAATTCCTGATCCTTCTGCCTCGTTTCCCCAGTGCTAGGATTATATGTG\n+TGCCACCATGATTTCTTTCTTTAGTTTTGAACATAGTCAGGCTGTCCTGGAACTCACAGA\n+TATATATGCTTGCCTCTGCCTCTCAAGCACTGCGGTTAAAGTATGGTCCTTTGAAGTCGC\n+TGCTCTAAGTGCCAATGAAAAGAAAAGCTAGACAG\n+>comp18_c0_seq1 len=329 path=[7445:0-328]\n+TTTTCTTATATCGCCTACATATTTTCCCAAATCAGCATTACCAAGCTGCTGCCCTAGATG\n+CCTTGGGTCAGACCTGTGCTGTCCGTTGTCTCCGCTGCAGCTAGCCTCTGAGAGAGAGGA\n+GTTGGCAGAGATTAGGGTATAATGCCATTAAAATGTGTAGTCGGAATGTAGCTCTGGAGC\n+TAGGCGATTACTGTTCCCATGTTTCTGTTTCCCCTTTAAACACTCTTAAAACTCGTACAA\n+AAGCACCTCATTGTCCCAATGTTCAGTATAAGCTGGCAGATAACCCTTGCTGGGGTATGG\n+GATCTCCGAGGTCCTTAGCCTCTAGGGTG\n+>comp21_c0_seq1 len=307 path=[8941:0-306]\n+GTCCTGCGTTTAGTAGCTGAGAGGTAGAGGGTGTTTTACAAAGTCCCATAAACAACTCTT\n+CTCTGTAGGAGTAAGTCAGTAAGTTAAAAATATTGCTTTATCTTGATTGAGTATAAACCA\n+CATTAAATTAAAAGTAAAATTAAAGTAAGCCACTTTACCATTCAAAGAAACAACCTAATA\n+ACTAATTTACAAACCTTTCCCCACGAGGACAAAAGACTTCTAATGGTTTAACAATACTGA\n+GAAAGGGTATTACACAAATAATTATAATAAACAACACAGTCCCTATTCTCTCCAAGCTAA\n+ATAATTT\n+>comp23_c0_seq1 len=253 path=[9973:0-252]\n+GTTAGGGCCAGTTTGAACACAATGACCTTTGGGTGACACTATTCAAATTATGACAGTTGA\n+TTTAACAAGAAAATCCTTTATACCTAAGACCTATGCCTCCATAAGTTAAGACCCAGAACA\n+TACACTAGAGTGACCTTTGGAAGAAATAAGAATGCCTGTTCTTTTTAGAAAATATCTGTA\n+AAACAACCTAGTCTTTGGCAGCACCTAGAAAACATAATTCTAGTTGCTCTGTGTGTGTGT\n+GTGTGTGTGTGTG\n+>comp26_c0_seq1 len=204 path=[12154:0-203]\n+CGGTATTCCTCAGGTGAGAATTCTTTGTTTAGCTCTGAGCCCCATTTTTTAATGGGGTTA\n+TTTGATTTTCTGGAGTCCACCACCTTCTTGAGTTCTTTATATATATTGGATATTAGTCCC\n+CTGTCTGATTTAGGATAGGCAAAGATCCTTTCCAAATCTGTTGGTGGCCTTTTTGTCTTA\n+TTGATGGTGTCTGTTGCCTTTCAG\n+>comp26_c1_seq1 len=220 path=[11776:0-219]\n+AGGCAGTCTTAGCAAAGTATGATGGTCAAATCTAAGTGGATCAAGGAACTCCACATAAAA\n+CCAGAGACACTGAAACTTATAGAGGAGAAAGTGGGGAAAAGCCTCGAAGATATGGGCACA\n+GGGGAAAAAGACCTGAATAGAAAAGCAATGCCTTGTGCTGTAAGATCGAGAATCGACAAA\n+TGAGACCTCATAAAATTGCAAAGCTTCTGTAAGGCAACAG\n+>comp30_c0_seq1 len=592 path=[14943:0-217 14374:218-591]\n+CTCTGTAACTCCCGACAAGAGCCACGGACGGACCTGGGGCTGCCAGTGATTGGAAGGCCT\n+AGGCACTTCACAGGCCTCTTTGCTCTGTAGCTTCTTCTTTATAGGTGTGGACAGGGAGGA\n+GGGTGCGGAGGGCTCTTAAGAAGGAAGGTGGCCTGGGTATGAGAGATGCACATCTCAGCT\n+GGGCTTCTGTAAGGAAATACATGAGGCTATCCTGGGATATCTCAAAGGGGGCCTTCCTCA\n+GTCACCCATTCGGGATATAAGGAAGAAAGAATAGGTCATTACACACAGGGGGACTGGAGC\n+GGAGTGTCAGCCTTCATATAACTTCAGTGGGAAGAGCAGGTGTAGTATAGCGAGGAAGAC\n+CACTGTATCTTGGCATGCAGTGGGGGAAAGGAGGAATTATAGACCTCAGAGTGACGCTTC\n+TGGTCTCAGTCAAGTTAATGCTCCCCAGAAACAGAAAGCACACAACCAAAGCTGGTCTTT\n+ACTTTGAAGTTTCGTGATGACATTGTCTTCATGCTGCACTCTGGTTGAGCCCCCTCCTGC\n+TCCCTCTGGTGCCCACACATTAGGGTGTGGGACTGTACAGGGTACATACAAC\n+>comp32_c0_seq1 len=3739 path=[19885:0-394 23839:395-436 20280:437-3738]\n+GGAGCTGGAGGCCCCCAGGCAACTACACCGTCCACGTACCCAGAGGGGCTGGGCCCTCCC\n+ACCAGAGACCACGCCCTGGTGTGCCTTAGGGGCCCTGGTTTGTTAGTCTCTGAGTGTGCA\n+GTTGCTGCACATGGGGCCCTGGCGCTTGCTGCACCAACTTCCTGTTGGGCCCGTGGTCCT\n+TGGAGGCATGCAGTTCAGCAGACAGTGACTCAGCCATCCACCCAACATGCGGAACGTGTC\n+TCTTCTGCAGGTCCCGGTCCACAGCAGGATTCCCCCTCTGTGAAAAGGCACGCTGATCTG\n+TCTGGATAAGTGTGGCCGGCCCCATGTATCCGGAATCAACCACGGGGTCCCCAGCTCGAC\n+TCTCCCTGCGGCAGACAGGCTCCCCCGGGATGATCTACAGTACTCGTTATGGGAGTCCCA\n+AAAGACAGCTCCAGTTTTACAGGAATCTGGGCAAATCTGGCCTTCGGGTCTCCTGCCTGG\n+GGCTTGGAACATGGGTGACCTTCGGGGGCCAGATCACGGATGAGATGGCAGAGCACCTAA\n+TGACCTTGGCCTACGATAATGGCATCAACCTGTTCGATACGGCGGAGGTCTACGCTGCTG\n+GAAAAGCTGAAGTGGTATTAGGGAACATCATTAAGAAGAAGGGATGGAGACGGTCCAGCC\n+TTGTCATCACCACCAAGATCTTCTGGGGTGGAAAAGCGGAGACTGAGAGAGGCCTTTCCA\n+GGAAGCACATAATTGAAGGACTGAAAGCGTCCCTGGAGCGGCTGCAGCTGGAGTACGTGG\n+ATGTGGTT'..b'GATGCTGCACACTTCTGCTACCTGATGGCCCA\n+GGTTGGATTTGGGGTTTATACAAAGAAAACTACAAAACTTGTCTTGATAGGCTCAAATCA\n+CAGTTTGCCCTTTTTAAAGTTTGCCACCAATGAAGCCATTCAAAGGACGGAGGCTTATGA\n+GTATGCTCAGTCCCTTGGGGCACATACCTGTTCCTTACCTAACTTCCAGGTGTTTAAATT\n+CATCTACTTGTGCCGCCTGGCTGAAATGGGACTTGCCACACAGGCCTTCCACTACTGCGA\n+AGTAATAGCCAAGAGTGTCCTGACACAGCCTGGTGCATACTCTCCAGTACTGATTAGCCA\n+GTTGACTCAGATGGCTTCCCAGTTACGCCTCTTCGATCCTCAACTGAAGGAGAAGCCAGA\n+AGAGGAGTCCTTTGTGGAGCCTGCCTGGTTGGTGCAGCTGCAGCACGTGGAGAGGCAGAT\n+CCAGGAGGGCACTGTGCTGTGGAGCCAGGATGGAACTGAACCCCAGCAGTGTCGCATTAC\n+ATCGGGCTCTGAGGTGGAGCAGTCAGATGGCCCTGGACTCAACCAGCAGGCAGGGCCACA\n+GGCCGACAACCCTCTACTGATGCCAAGCACTGAGCCTTTGATGCATGGCGTGCAACTGCT\n+GCCCACAGCTCCTCAGACATTGCCTGATGGCCAGCCTGCTCACCTTTCCAGGGTGCCGAT\n+GTTCCCAGTGCCAATGTCTCGTGGGCCCCTGGAGCTGAGTCCTGCCTATGGACCCCCAGG\n+ATCTGCACTTGGCTTCCCAGAATCCTCCAGATCTGATCCTGCAGTGCTGCATCCTGGGCA\n+GGCCCTGCCACCCACTACACTAAGTCTCCAGGAAAGTGGGCTTCCACCCCAGGAGGCCAA\n+AAGTCCAGACCCAGAAATGGTGCCACGGGGCTCACCTGTCAGACACTCCCCTCCGGAGCT\n+CAGTCAAGAGGAGTTTGGCGAGAGCTTCGCTGACCCGGGCTCTTCCAGAACAGCACAGGA\n+CTTGGAAACCTCCCCAGTGTGGGATCTTGGCAGCTCCAGTTTGACACGTGCACCATCTTT\n+GACATCTGATTCTGAAGGGAAGAAACCTGCACAGGCTGTCAAAAAGGAGCCCAAGGAGCC\n+CAAGAAGACTGAGTCCTGGTTCTCTCGTTGGCTGCCTGGAAAAAAAAGGACAGAAGCTTA\n+TCTACCAGATGACAAGAACAAATCAATTGTTTGGGATGAAAAGAAAAACCAGTGGGTGAA\n+TTTGAATGAACCAGAGGAGGAGAAGAAGGCTCCACCCCCACCTCCAACATCGTTCCCCAG\n+GGTTCCCCAGGTGGCTCCCACTGGGCCTGCAGGACCACCCACGGCCTCCGTGAATGTGTT\n+TTCTAGAAAAGCAGGTGGGTCCAGAGCTCGCTATGTGGATGTTCTAAACCCAAGTGGAAC\n+ACAGCGGAGTGAACCAGCTCTTGCTCCTGCAGATTTCTTTGCTCCTCTCGCCCCACTGCC\n+AATTCCTTCTAACTTATTTGTACCAAACCCAGATGCAGAAGAGCCACAACCTGCAGATGG\n+GACTGGCTGTAGGGGACAGGCACCAGCTGGGACTCAGTCTAAGGCAGAGTCCACCCTGGA\n+ACCCAAGGTGGGAAGTTCTACAGTATCAGCCCCTGGACCTGAGCTCTTACCCTCCAAACC\n+AGATGGCTCCCAAGGAGGAGAGCTCTCGCGTTGTAGCTCTCTGAGCTCGCTCTCACAGGA\n+AGTGAGCCGGCATTTCCATC\n+>comp51_c2_seq5 len=1755 path=[154406:0-236 150319:237-958 151042:959-1754]\n+ACAACCCTGCTCAGTTGGTGCAGAATTGGCAGAACAAAAACAAATCATGTCTGCAGTGCT\n+GTGGGCTAGGAAATGCTGAGCAGCAGGAACTTGGCACATACAGAAGGTCTTGCTTACACT\n+TGAAGCTAACACAGAGTCTGCCCATTTGTTATTGATCTGTGGGGTTATCCTGTAGCTCCA\n+TCGCTGGACAGCCTCTGGGCATGGACACATGTAGGTCATTGCTGTTCCTTTTGTTCTAGG\n+CTTCTGTCACCTCGGGAAATTCAAGGCCAGGGAGGATTGGCCAAAGAAAATATGCGGCAT\n+TGAACTAGGATTGCTGCTGGATTCTTATTTGGAACCCTGAGATGATTAGATAACTGTTCT\n+TCACCAAGAACCAACTAGCTGCCTCAGGTCTCGTGGATCCCAGATGGACATGGCAGTAAC\n+TCAGAGCCGGTCATACTGTGTGTCTCCTTCCTTGCAGCCTATGTATATGAATGATTGAAA\n+AATTGATTTCAAGGTTCCTTTCAAATGCTGGATAGAATGAATCTAATTCAAAGTCTTTTA\n+TGTAAAAGAACAATGTTTCTTTTTTAGTCACTTTAGACAAATTAAGGTTTGAGCCACTTG\n+AGAATATCCCCGCAGAGAGGCTAGGCAGGGATTCACTACAGGAGAGCACAGTTACTTGGA\n+CACCAAGGCCTTTCTCACCTGAGAAATGGTGCAAACCAACCAGGGATCCTTAGGCTACTG\n+AGAAGGCTGTCTGCTTACCTGGAGAGGACTGACTGCTGGGATGTCTGGCCTAAGAGGAGC\n+CTCAAACTGCACTGGGCAACTGTGCCTTGTCCTGTGAACTGGGGAGTACATGCCAGCTCC\n+TCAAAGGGTCACCAGATGGCAGCGGCTCCCAGGCTGGGGAGGAAGTGGTTACAAGGGCAT\n+GTCTCTCCAACATTTCCAGCCTCACTCCATTTTCTGTGGCCTCATTAGTGCTGGGTGCGT\n+TTTTTTTTGCTCCGAGCTTTCCGTCATGGCTCAGTCACTGGGGAGAGCCCCAGGGAGCTG\n+ATATTCTGAAGCTGCTGATCTGATTAACCAGGTCAAAAACTGGCTGTGTGCTGCAACGTA\n+CTGCCCTCCTAGCCAGGAGTCTGTATGGAGCCCTCTAGCTGACTCAATGTCACCTGCAGA\n+CAGCACTTAAAGCCTTGGTGGTATCTTGGAGTGGCTTACGGTGTCCTAGCGCACACTCTG\n+GAACCTTGCATCTCTACCCCAAAGCCCACCAGGGACCTTCAGCAGCGACTCACTGGCTCA\n+CCCATATCACTTCAAGGAGCTGCCACCACATAAGAGTCAAAGCTTTAAGGCTTCACTGAC\n+TGTTCCTAAAACTATCCTTACACTCTAAAGAGGAAGGGAAAGAGAGTTAAATATTTAAAA\n+ATCAGCAGGTAGAAAACATAAATGGTTTGTTTGGTATTAAATTTAGAAATTGTAGAACAT\n+GGAATTAGTACAGCCCTTCCCGGTTTTCTGGTTCCTTGCATCATCTAGGAAAGACTCACA\n+GAATGGCTTCCAGGACGTTCCATAGATGAGTGTACTTGGACCCTCCTGTTCCTGTCATTA\n+AGAAAGGCCTGTCCCTGTGGGTCAGTAGCAACTTGTGTCCTTGCTGGGACCAGGCAGGAC\n+CCTTGCCTCTGGTTCAACCCTGCCCAGGCTCTGTGTTGACTGCCAACCACTTTGGATTCA\n+AGAAGCTTCCTCCCTGGGAGCTGCTATTTTTCCAAAATTCACATTGTTAAATAAAATATT\n+TTGGGCTCTCAAGTA\n+>comp52_c0_seq1 len=261 path=[157215:0-260]\n+GGGGCATGTGTGTTAGAATACGTGTAGAAGTCAGAGGGTAACTTGTAGGAGTTGGTTCTC\n+TCCCTCTCCCATGTGAGAATGACCTTGGTAGCAAACACCCTTAACAAATGACCCATCTCT\n+CTGTCCTCAGCCACAATGATTTTATAACACTTTCTCTGTAAGAATTCAATTTAAGAATAA\n+TTTCCTTCCCTTTATTACCTTCCTACTTCAGGCTAAGAACAAGGAACCAAAGTATCTACT\n+CAACAAACTCAGAGAAAGACA\n'
b
diff -r 000000000000 -r ca988deacfd1 test-data/trinity_gene_map.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/trinity_gene_map.tabular Fri Feb 07 08:07:29 2014 -0500
b
@@ -0,0 +1,84 @@
+comp6_c0 comp6_c0_seq1
+comp12_c0 comp12_c0_seq1
+comp17_c0 comp17_c0_seq1
+comp18_c0 comp18_c0_seq1
+comp21_c0 comp21_c0_seq1
+comp23_c0 comp23_c0_seq1
+comp26_c0 comp26_c0_seq1
+comp26_c1 comp26_c1_seq1
+comp30_c0 comp30_c0_seq1
+comp32_c0 comp32_c0_seq1
+comp32_c0 comp32_c0_seq2
+comp33_c0 comp33_c0_seq1
+comp35_c0 comp35_c0_seq1
+comp36_c0 comp36_c0_seq1
+comp36_c1 comp36_c1_seq1
+comp37_c0 comp37_c0_seq1
+comp38_c0 comp38_c0_seq1
+comp40_c0 comp40_c0_seq1
+comp40_c1 comp40_c1_seq1
+comp41_c0 comp41_c0_seq1
+comp41_c1 comp41_c1_seq1
+comp42_c0 comp42_c0_seq1
+comp43_c0 comp43_c0_seq1
+comp44_c0 comp44_c0_seq1
+comp44_c0 comp44_c0_seq2
+comp45_c0 comp45_c0_seq1
+comp45_c1 comp45_c1_seq1
+comp45_c1 comp45_c1_seq2
+comp45_c1 comp45_c1_seq3
+comp45_c1 comp45_c1_seq4
+comp45_c1 comp45_c1_seq5
+comp45_c1 comp45_c1_seq6
+comp45_c1 comp45_c1_seq7
+comp45_c1 comp45_c1_seq8
+comp45_c2 comp45_c2_seq1
+comp45_c3 comp45_c3_seq1
+comp46_c0 comp46_c0_seq1
+comp46_c0 comp46_c0_seq2
+comp46_c1 comp46_c1_seq1
+comp46_c1 comp46_c1_seq2
+comp46_c1 comp46_c1_seq3
+comp47_c0 comp47_c0_seq1
+comp47_c0 comp47_c0_seq2
+comp47_c0 comp47_c0_seq3
+comp47_c0 comp47_c0_seq4
+comp47_c0 comp47_c0_seq5
+comp47_c0 comp47_c0_seq6
+comp47_c0 comp47_c0_seq7
+comp47_c2 comp47_c2_seq1
+comp48_c0 comp48_c0_seq1
+comp48_c0 comp48_c0_seq2
+comp49_c0 comp49_c0_seq1
+comp49_c1 comp49_c1_seq1
+comp49_c2 comp49_c2_seq1
+comp49_c3 comp49_c3_seq1
+comp49_c4 comp49_c4_seq1
+comp49_c5 comp49_c5_seq1
+comp49_c6 comp49_c6_seq1
+comp49_c6 comp49_c6_seq2
+comp49_c6 comp49_c6_seq3
+comp49_c6 comp49_c6_seq4
+comp50_c0 comp50_c0_seq1
+comp50_c1 comp50_c1_seq1
+comp50_c1 comp50_c1_seq2
+comp50_c1 comp50_c1_seq3
+comp50_c1 comp50_c1_seq4
+comp50_c1 comp50_c1_seq5
+comp50_c1 comp50_c1_seq6
+comp50_c1 comp50_c1_seq7
+comp50_c1 comp50_c1_seq8
+comp50_c1 comp50_c1_seq9
+comp50_c1 comp50_c1_seq10
+comp50_c1 comp50_c1_seq11
+comp50_c1 comp50_c1_seq12
+comp50_c2 comp50_c2_seq1
+comp50_c3 comp50_c3_seq1
+comp51_c0 comp51_c0_seq1
+comp51_c1 comp51_c1_seq1
+comp51_c2 comp51_c2_seq1
+comp51_c2 comp51_c2_seq2
+comp51_c2 comp51_c2_seq3
+comp51_c2 comp51_c2_seq4
+comp51_c2 comp51_c2_seq5
+comp52_c0 comp52_c0_seq1
b
diff -r 000000000000 -r ca988deacfd1 tool-data/rsem_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rsem_indices.loc.sample Fri Feb 07 08:07:29 2014 -0500
b
@@ -0,0 +1,14 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in 
+#/depot/data2/galaxy/bowtie/hg18/, 
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18   hg18   hg18   /depot/data2/galaxy/bowtie/hg18/hg18
b
diff -r 000000000000 -r ca988deacfd1 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Fri Feb 07 08:07:29 2014 -0500
b
@@ -0,0 +1,8 @@
+<tables>
+    <!-- Locations of all rsem reference files under genome directory -->
+    <table name="rsem_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/rsem_indices.loc" />
+    </table>
+</tables>
+
b
diff -r 000000000000 -r ca988deacfd1 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Fri Feb 07 08:07:29 2014 -0500
b
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="rsem" version="1.1.17">
+        <repository changeset_revision="cd7f70d4c687" name="package_rsem_1_1_17" owner="jjohnson" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="samtools" version="0.1.19">
+        <repository changeset_revision="00e17a794a2e" name="package_samtools_0_1_19" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="bowtie" version="1.0.0">
+        <repository changeset_revision="9fcaaedbbfd6" name="package_bowtie_1_0_0" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>