Repository 'deg_annotate'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/deg_annotate

Changeset 0:b42373cddb77 (2018-11-23)
Next changeset 1:e98d4ab5b5bc (2019-01-23)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deg_annotate commit bc766aeec78fe74a1d70d608d2f73ba3f2a3e047
added:
deg_annotate.py
deg_annotate.xml
test-data/annotation.gtf
test-data/deseq2_output.tabular
test-data/dexseq_output.tabular
b
diff -r 000000000000 -r b42373cddb77 deg_annotate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deg_annotate.py Fri Nov 23 01:59:47 2018 -0500
[
b'@@ -0,0 +1,178 @@\n+import argparse\n+import os\n+from collections import defaultdict\n+\n+from BCBio import GFF\n+\n+\n+def strandardize(strand):\n+    if str(strand) == \'-1\':\n+        strand = \'-\'\n+    elif str(strand) == \'1\':\n+        strand = \'+\'\n+    return strand\n+\n+\n+def gff_to_dict(f_gff, feat_type, idattr, txattr, attributes, input_type):\n+    """\n+    It reads only exonic features because not all GFF files contain gene and trascript features. From the exonic\n+    features it extracts gene names, biotypes, start and end positions. If any of these attributes do not exit\n+    then they are set to NA.\n+    """\n+    annotation = defaultdict(lambda: defaultdict(lambda: \'NA\'))\n+    exon_pos = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))\n+    tx_info = defaultdict(lambda: defaultdict(str))\n+\n+    with open(f_gff) as gff_handle:\n+        for rec in GFF.parse(gff_handle, limit_info=dict(gff_type=[feat_type]), target_lines=1):\n+            for sub_feature in rec.features:\n+                start = sub_feature.location.start\n+                end = sub_feature.location.end\n+                strand = strandardize(sub_feature.location.strand)\n+                try:\n+                    geneid = sub_feature.qualifiers[idattr][0]\n+                except KeyError:\n+                    print("No \'" + idattr + "\' attribute found for the feature at position "\n+                          + rec.id + ":" + str(start) + ":" + str(end) + ". Please check your GTF/GFF file.")\n+                    continue\n+\n+                annotation[geneid][\'chr\'] = rec.id\n+                annotation[geneid][\'strand\'] = strand\n+                if annotation[geneid][\'start\'] == \'NA\' or start <= int(annotation[geneid][\'start\']):\n+                    annotation[geneid][\'start\'] = start\n+                if annotation[geneid][\'end\'] == \'NA\' or end >= int(annotation[geneid][\'end\']):\n+                    annotation[geneid][\'end\'] = end\n+\n+                for attr in attributes:\n+                    if attr in annotation[geneid]:\n+                        continue\n+                    try:\n+                        annotation[geneid][attr] = sub_feature.qualifiers[attr][0]\n+                    except KeyError:\n+                        annotation[geneid][attr] = \'NA\'\n+                # extract exon information only in case of dexseq output\n+                if input_type != "dexseq":\n+                    continue\n+                try:\n+                    txid = sub_feature.qualifiers[txattr][0]\n+                    tx_info[txid][\'chr\'] = rec.id\n+                    tx_info[txid][\'strand\'] = strand\n+                    exon_pos[txid][int(start)][int(end)] = 1\n+                except KeyError:\n+                    print("No \'" + txattr + "\' attribute found for the feature at position " + rec.id + ":" + str(\n+                        start) + ":" + str(end) + ". Please check your GTF/GFF file.")\n+                    pass\n+\n+    bed_entries = []\n+    # create BED lines only for deseq output\n+    if input_type == "dexseq":\n+        for txid in exon_pos.keys():\n+            starts = sorted(exon_pos[txid])\n+            strand = tx_info[txid][\'strand\']\n+            if strand == \'-\':\n+                starts = reversed(starts)\n+            for c, start in enumerate(starts, 1):\n+                ends = sorted(exon_pos[txid][start])\n+                if strand == \'-\':\n+                    ends = reversed(ends)\n+                for end in ends:\n+                    bed_entries.append(\'\\t\'.join([tx_info[txid][\'chr\'], str(start), str(end),\n+                                                  txid + \':\' + str(c), \'0\', strand]))\n+\n+    return annotation, bed_entries\n+\n+\n+def main():\n+    parser = argparse.ArgumentParser(description=\'Annotate DESeq2/DEXSeq tables with information from GFF/GTF files\')\n+    parser.add_argument(\'-in\', \'--input\', required=True,\n+                        help=\'DESeq2/DEXSeq output. It is allowed to have extra information, \'\n+                             \'but make'..b'This should match the first column of DESeq2 output(default: transcript_id)\')\n+    parser.add_argument(\'-a\', \'--attributes\', default=\'gene_biotype, gene_name\', required=False,\n+                        help=\'Comma separated attributes to include in output. Default: gene_biotype, gene_name\')\n+    parser.add_argument(\'-o\', \'--output\', required=True, help=\'Output file\')\n+    args = parser.parse_args()\n+\n+    print("DE(X)Seq output file     : %s" % args.input)\n+    print("Input file type          : %s" % args.mode)\n+    print("Annotation file          : %s" % args.gff)\n+    print("Feature type             : %s" % args.type)\n+    print("ID attribute             : %s" % args.idattr)\n+    print("Transcript attribute     : %s" % args.txattr)\n+    print("Attributes to include    : %s" % args.attributes)\n+    print("Annotated output file    : %s" % args.output)\n+\n+    attr = [x.strip() for x in args.attributes.split(\',\')]\n+    annotation, bed_entries = gff_to_dict(args.gff, args.type, args.idattr, args.txattr, attr, args.mode)\n+\n+    d_binexon = {}\n+    skip_exon_annotation = False\n+\n+    if args.mode == "dexseq":\n+        with open(args.input) as fh_input, open("input.bed", "w") as fh_input_bed:\n+            for line in fh_input:\n+                f = line.split(\'\\t\')\n+                fh_input_bed.write(\'\\t\'.join([f[11], f[12], f[13], f[0], "0", f[15]]) + "\\n")\n+\n+        if len(bed_entries) == 0 and args.mode == "dexseq":\n+            print("It seems there are no transcript ids present in GFF file. Skipping exon annotation.")\n+            skip_exon_annotation = True\n+\n+        if not skip_exon_annotation:\n+            with open("annotation.bed", "w") as fh_annotation_bed:\n+                for line in bed_entries:\n+                    fh_annotation_bed.write(line + "\\n")\n+\n+            # interset the DEXseq couting bins with exons in the GFF file\n+            # overlaped positions can be later used to infer which bin corresponds to which exon\n+            os.system("intersectBed -wo -s -a input.bed -b annotation.bed > overlap.txt")\n+\n+            with open("overlap.txt") as fh_overlap:\n+                for line in fh_overlap:\n+                    binid = line.split(\'\\t\')[3]\n+                    exonid = line.split(\'\\t\')[9]\n+                    d_binexon.setdefault(binid, []).append(exonid)\n+\n+    with open(args.input) as fh_input, open(args.output, \'w\') as fh_output:\n+        for line in fh_input:\n+            annot = []\n+            # Append the extra information from GFF to DESeq2 output\n+            if args.mode == "deseq2":\n+                geneid = line.split(\'\\t\')[0]\n+                annot = [str(annotation[geneid][\'chr\']),\n+                         str(annotation[geneid][\'start\']),\n+                         str(annotation[geneid][\'end\']),\n+                         str(annotation[geneid][\'strand\'])]\n+                for a in attr:\n+                    annot.append(annotation[geneid][a])\n+            # DEXSeq exonic bins might originate from aggrigating multiple genes. They are are separated by \'+\'\n+            # Append the attributes from the GFF but keep the order of the aggregated genes and use \'+\'\n+            # Aappend the transcript id and exon number from the annotation that correspond to the DEXseq counting bins\n+            elif args.mode == "dexseq":\n+                geneids = line.split(\'\\t\')[1].split(\'+\')\n+                for a in attr:\n+                    tmp = []\n+                    for geneid in geneids:\n+                        tmp.append(str(annotation[geneid][a]))\n+                    annot.append(\'+\'.join(tmp))\n+                if not skip_exon_annotation:\n+                    binid = line.split(\'\\t\')[0]\n+                    try:\n+                        annot.append(\',\'.join(sorted(set(d_binexon[binid]))))\n+                    except KeyError:\n+                        annot.append(\'NA\')\n+            fh_output.write(line.rstrip(\'\\n\') + \'\\t\' + \'\\t\'.join(annot) + \'\\n\')\n+\n+\n+if __name__ == "__main__":\n+    main()\n'
b
diff -r 000000000000 -r b42373cddb77 deg_annotate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deg_annotate.xml Fri Nov 23 01:59:47 2018 -0500
[
@@ -0,0 +1,125 @@
+<tool id="deg_annotate" name="Annotate DESeq2/DEXSeq output tables" version="1.0">
+    <description>Append useful information from annotation files to DESeq2/DEXSeq outputs</description>
+    <requirements>
+        <requirement type="package" version="2.27.0">bedtools</requirement>
+        <requirement type="package" version="0.6.4">bcbiogff</requirement>
+    </requirements>
+    <command>
+    <![CDATA[
+        python '$__tool_directory__/deg_annotate.py' -in '$input_table'
+        -m '$mode'
+        -g '$annotation'
+        -t '$advanced_parameters.gff_feature_type'
+        -i '$advanced_parameters.gff_feature_attribute'
+        -x '$advanced_parameters.gff_transcript_attribute'
+        -a '$advanced_parameters.gff_attributes'
+        -o '$output'
+    ]]>
+    </command>
+    <inputs>
+        <param name="input_table"
+               type="data"
+               format="tabular"
+               argument="-in"
+               label="Tabular output of DESeq2 or DEXSeq"/>
+
+        <param name="mode" type="select" argument="-m" label="Input file type">
+                <option value="deseq2">DESeq2</option>
+                <option value="dexseq">DEXseq</option>
+        </param>
+
+        <param name="annotation"
+               type="data"
+               format="gff,gtf,gff3"
+               argument="-g"
+               label="Reference annotation in GFF/GTF format" />
+
+        <section name="advanced_parameters" title="Advanced options">
+            <param name="gff_feature_type"
+                type="text"
+                value="exon"
+                argument="-t"
+                label="GFF feature type"
+                help="This is the 3rd column in GFF file. Only rows which have the matched feature type in the GTF annotation file will be included. `exon' by default." />
+
+            <param name="gff_feature_attribute"
+                type="text"
+                value="gene_id"
+                argument="-i"
+                label="GFF feature identifier"
+                help="GFF attribute to be used as feature identifier. The value of this attribute should match the first column of DESeq2 output (default: gene_id)" />
+
+            <param name="gff_transcript_attribute"
+                type="text"
+                value="transcript_id"
+                argument="-x"
+                label="GFF transcript identifier"
+                help="GFF attribute to be used as transcript identifier. This options is only used for DEXSeq output annotation. Exon numbers are counted for each transcript separately (default: transcript_id)" />
+
+            <param name="gff_attributes"
+                type="text"
+                value="gene_biotype, gene_name"
+                argument="-a"
+                label="GFF attributes to include"
+                help="Comma separated list of attributes from GFF file to include in output. These attributes should associate with your chosen GFF feature type." />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_table"
+                value="deseq2_output.tabular"/>
+            <param name="annotation"
+               value="annotation.gtf"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text_matching expression="FBgn0025111\t2192.32236942864\t2.69993841720991\t0.0979447231457099\t27.565940568266\t2.8504782974107e-167\t6.1121380892229e-164\tchrX\t10778953\t10786907\t-\tprotein_coding\tAnt2"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_table"
+                value="dexseq_output.tabular"/>
+            <param name="annotation"
+               value="annotation.gtf"/>
+            <param name="mode"
+               value="dexseq"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text_matching expression="FBgn0025111\+FBgn0003360:E005\tFBgn0025111\+FBgn0003360\tE005\t0.273966640920426\t6.62572321505791\t0.774068626605711\t0.378961325638675\tNA\t0.41523701984849\t1.17020080867011\t2.99101950917789\tchrX\t10780596\t10780661\t66\t-\t10\t0\t0\t0\t0\t0\t2\tFBtr0073425, FBtr0333963\tprotein_coding\+protein_coding\tAnt2\+sesB\tFBtr0073425:1,FBtr0333963:1"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help>
+       <![CDATA[
+
+**What it does**
+
+    This tool appends the output table of DESeq2 or DEXSeq with gene symbols, biotypes, positions etc. The information
+    you want to add is configurable. This information should present in the input GTF/GFF file as attributes of feature
+    you choose.
+    DEXSeq-Count tool is used to prepare the DEXSeq compatible annotation (flattened GTF file) from input GTF/GFF. In
+    this  process, the exons that appear multiple times, once for each transcript are collapsed to so called
+    *exon counting bins*. Counting bins for parts of exons arise when an exonic region appears with different
+    boundaries in different transcripts. The resulting flattened GTF file contains pseudo exon ids per gene instead
+    of per transcript. This tool maps the DEXSeq couting bins back to the original exon ids. This mapping is only
+    possible if the input GTF/GFF file contains transcript identifier attribute for the chosen features type.
+
+**Inputs**
+
+**Differential gene expression tables**
+    At the moment, this tool supports DESeq2 and DEXSeq tool outputs.
+
+**Annotation**
+    Annotation file ne GTF or GFF3 format that was used for counting.
+
+**Outputs**
+    Input tabular file and with chosen attributes appended as additional columns.
+
+        ]]>
+    </help>
+</tool>
b
diff -r 000000000000 -r b42373cddb77 test-data/annotation.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotation.gtf Fri Nov 23 01:59:47 2018 -0500
b
b'@@ -0,0 +1,360 @@\n+chr3R\tFlyBase\tgene\t6762593\t6765261\t.\t+\t.\tgene_id "FBgn0000071"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding";\n+chr3R\tFlyBase\ttranscript\t6762593\t6765261\t.\t+\t.\tgene_id "FBgn0000071"; transcript_id "FBtr0081618"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";\n+chr3R\tFlyBase\texon\t6762593\t6762996\t.\t+\t.\tgene_id "FBgn0000071"; transcript_id "FBtr0081618"; exon_number "1"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; exon_id "FBtr0081618-E1";\n+chr3R\tFlyBase\tCDS\t6762980\t6762996\t.\t+\t0\tgene_id "FBgn0000071"; transcript_id "FBtr0081618"; exon_number "1"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; protein_id "FBpp0081135";\n+chr3R\tFlyBase\tstart_codon\t6762980\t6762982\t.\t+\t0\tgene_id "FBgn0000071"; transcript_id "FBtr0081618"; exon_number "1"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";\n+chr3R\tFlyBase\texon\t6763833\t6765261\t.\t+\t.\tgene_id "FBgn0000071"; transcript_id "FBtr0081618"; exon_number "2"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; exon_id "FBtr0081618-E2";\n+chr3R\tFlyBase\tCDS\t6763833\t6764838\t.\t+\t1\tgene_id "FBgn0000071"; transcript_id "FBtr0081618"; exon_number "2"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; protein_id "FBpp0081135";\n+chr3R\tFlyBase\tstop_codon\t6764839\t6764841\t.\t+\t0\tgene_id "FBgn0000071"; transcript_id "FBtr0081618"; exon_number "2"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";\n+chr3R\tFlyBase\tfive_prime_utr\t6762593\t6762979\t.\t+\t.\tgene_id "FBgn0000071"; transcript_id "FBtr0081618"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";\n+chr3R\tFlyBase\tthree_prime_utr\t6764842\t6765261\t.\t+\t.\tgene_id "FBgn0000071"; transcript_id "FBtr0081618"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";\n+chr3R\tFlyBase\ttranscript\t6763404\t6765261\t.\t+\t.\tgene_id "FBgn0000071"; transcript_id "FBtr0081619"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";\n+chr3R\tFlyBase\texon\t6763404\t6763516\t.\t+\t.\tgene_id "FBgn0000071"; transcript_id "FBtr0081619"; exon_number "1"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; exon_id "FBtr0081619-E1";\n+chr3R\tFlyBase\texon\t6763833\t6765261\t.\t+\t.\tgene_id "FBgn0000071"; transcript_id "FBtr0081619"; exon_number "2"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; exon_id "FBtr0081619-E2";\n+chr3R\tFlyBase\tCDS\t6763840\t6764838\t.\t+\t0\tgene_id "FBgn0000071"; transcript_id "FBtr0081619"; exon_number "2"; gene_name "Ama"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ama-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; protein_id "FBpp0081136";\n+chr3R\tFlyBase\tstart_codon\t6763840\t6763842\t.\t+\t0\tgene_id "FBgn0000071"; transcript_id "FBtr0081619"; exon_number "2"; gene_nam'..b'pp0085117";\n+chr3R\tFlyBase\texon\t31202355\t31202678\t.\t+\t.\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; exon_number "9"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; exon_id "FBtr0085755-E9";\n+chr3R\tFlyBase\tCDS\t31202355\t31202678\t.\t+\t2\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; exon_number "9"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; protein_id "FBpp0085117";\n+chr3R\tFlyBase\texon\t31203048\t31203236\t.\t+\t.\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; exon_number "10"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; exon_id "FBtr0085755-E10";\n+chr3R\tFlyBase\tCDS\t31203048\t31203236\t.\t+\t2\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; exon_number "10"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; protein_id "FBpp0085117";\n+chr3R\tFlyBase\texon\t31203372\t31203457\t.\t+\t.\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; exon_number "11"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; exon_id "FBtr0085755-E11";\n+chr3R\tFlyBase\tCDS\t31203372\t31203457\t.\t+\t2\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; exon_number "11"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; protein_id "FBpp0085117";\n+chr3R\tFlyBase\texon\t31203542\t31203722\t.\t+\t.\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; exon_number "12"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; exon_id "FBtr0085755-E12";\n+chr3R\tFlyBase\tCDS\t31203542\t31203643\t.\t+\t0\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; exon_number "12"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; protein_id "FBpp0085117";\n+chr3R\tFlyBase\tstop_codon\t31203644\t31203646\t.\t+\t0\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; exon_number "12"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";\n+chr3R\tFlyBase\tfive_prime_utr\t31196916\t31197011\t.\t+\t.\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";\n+chr3R\tFlyBase\tthree_prime_utr\t31203647\t31203722\t.\t+\t.\tgene_id "FBgn0039827"; transcript_id "FBtr0085755"; gene_name "CG1544"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1544-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";\n+chr3L\tFlyBase\tgene\t820759\t821512\t.\t+\t.\tgene_id "FBgn0264475"; gene_name "CR43883"; gene_source "FlyBase"; gene_biotype "lincRNA";\n+chr3L\tFlyBase\ttranscript\t820759\t821512\t.\t+\t.\tgene_id "FBgn0264475"; transcript_id "FBtr0332751"; gene_name "CR43883"; gene_source "FlyBase"; gene_biotype "lincRNA"; transcript_name "CR43883-RA"; transcript_source "FlyBase"; transcript_biotype "lincRNA";\n+chr3L\tFlyBase\texon\t820759\t821512\t.\t+\t.\tgene_id "FBgn0264475"; transcript_id "FBtr0332751"; exon_number "1"; gene_name "CR43883"; gene_source "FlyBase"; gene_biotype "lincRNA"; transcript_name "CR43883-RA"; transcript_source "FlyBase"; transcript_biotype "lincRNA"; exon_id "FBtr0332751-E1";\n'
b
diff -r 000000000000 -r b42373cddb77 test-data/deseq2_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/deseq2_output.tabular Fri Nov 23 01:59:47 2018 -0500
b
@@ -0,0 +1,10 @@
+FBgn0039155 1086.97429520489 -4.14844887121203 0.134949009976641 -30.7408618405583 1.61992743879e-207 1.38941176425019e-203
+FBgn0003360 6409.57712820784 -2.99977687808929 0.104345174652411 -28.7485922380405 9.43213642530181e-182 4.04497170599068e-178
+FBgn0026562 65114.8405637953 -2.38016378555818 0.0843270368933908 -28.22539334054 2.85397270642668e-175 8.15950796767388e-172
+FBgn0025111 2192.32236942864 2.69993841720991 0.0979447231457099 27.565940568266 2.8504782974107e-167 6.1121380892229e-164
+FBgn0029167 5430.06727658048 -2.1050612887726 0.0925467282777971 -22.7459287642654 1.57453789049645e-114 2.70096229735761e-111
+FBgn0039827 390.901782011095 -3.50301240162969 0.160029809280641 -21.88974927469 3.25298224788205e-106 4.6501381233474e-103
+FBgn0035085 928.263812261588 -2.41407351603462 0.115185333184648 -20.9581675834087 1.58055521794036e-97 1.93663172918206e-94
+FBgn0034736 330.38302328757 -3.01817719625006 0.158154372479841 -19.0837417197224 3.44677215747821e-81 3.69537059933632e-78
+FBgn0264475 955.454453674265 -2.33448569609426 0.124230191435521 -18.7916131265557 8.84494667012502e-79 8.42923417662915e-76
+FBgn0000071 468.057925667952 2.36001641134183 0.13564419237713 17.3985805804372 8.45727146596554e-68 7.25380173635864e-65
b
diff -r 000000000000 -r b42373cddb77 test-data/dexseq_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dexseq_output.tabular Fri Nov 23 01:59:47 2018 -0500
b
b'@@ -0,0 +1,74 @@\n+FBgn0000071:E001\tFBgn0000071\tE001\t0.0192506486025934\t14\t0.0199243738540247\t0.887748502701671\tNA\t0.0270972419820765\t0.26215027289167\t6.5484386673406\tchr3R\t6762593\t6762996\t404\t+\t1\t0\t0\t0\t0\t0\t0\tFBtr0081618\n+FBgn0000071:E002\tFBgn0000071\tE002\t0.363216696948006\t5.84518530646982\t0.0113232157782903\t0.915256583298\tNA\t1.1031345072124\t0.623779161487839\t-1.64605496381894\tchr3R\t6763404\t6763500\t97\t+\t7\t0\t0\t7\t0\t0\t2\tFBtr0081619, FBtr0081620\n+FBgn0000071:E003\tFBgn0000071\tE003\t0.784502185963002\t4.82131756123766\t0.00622165600043445\t0.937130080067176\tNA\t1.03116251478534\t1.16708916978339\t0.357665857300601\tchr3R\t6763501\t6763516\t16\t+\t31\t0\t0\t7\t0\t0\t1\tFBtr0081619\n+FBgn0000071:E004\tFBgn0000071\tE004\t26.3510395213692\t1.96800519978103\t0.00561951934103888\t0.940243790029249\t1\t5.42399545115501\t5.87836276689312\t0.238607928271459\tchr3R\t6763833\t6765261\t1429\t+\t965\t0\t1\t183\t0\t0\t82\tFBtr0081619, FBtr0081618, FBtr0081620\n+FBgn0025111+FBgn0003360:E001\tFBgn0025111+FBgn0003360\tE001\t90.0645737707993\t0.0126372415767218\t266.729686621202\t5.85700535276357e-60\t7.03514197947196e-56\t7.55262108655923\t19.0700856066207\t3.0486661948835\tchrX\t10778954\t10779695\t742\t-\t3297\t1\t0\t555\t1\t0\t341\tFBtr0073425, FBtr0073426, FBtr0333963\n+FBgn0025111+FBgn0003360:E002\tFBgn0025111+FBgn0003360\tE002\t12.6315028999338\t0.0374837624127616\t69.0471690350968\t9.6136433108249e-17\t1.53965702170631e-13\t2.85427512792234\t8.93432009189011\t3.38287587486196\tchrX\t10779993\t10780120\t128\t-\t483\t0\t0\t54\t0\t0\t54\tFBtr0073425, FBtr0073426, FBtr0333963\n+FBgn0025111+FBgn0003360:E003\tFBgn0025111+FBgn0003360\tE003\t27.0879920211274\t0.0102790782383009\t240.828983563173\t2.59388895137782e-54\t2.07709980929831e-50\t4.28496921256544\t13.1457280028538\t3.42758025357735\tchrX\t10780197\t10780526\t330\t-\t1025\t0\t0\t135\t0\t0\t111\tFBtr0073425, FBtr0073426, FBtr0333963\n+FBgn0025111+FBgn0003360:E004\tFBgn0025111+FBgn0003360\tE004\t0.571953699696933\t3.19738473523419\t1.35362539704593\t0.244645318616069\tNA\t0.664852761525701\t1.62420816847541\t2.58005011767578\tchrX\t10780527\t10780595\t69\t-\t19\t0\t0\t4\t0\t0\t3\tFBtr0073425\n+FBgn0025111+FBgn0003360:E005\tFBgn0025111+FBgn0003360\tE005\t0.273966640920426\t6.62572321505791\t0.774068626605711\t0.378961325638675\tNA\t0.41523701984849\t1.17020080867011\t2.99101950917789\tchrX\t10780596\t10780661\t66\t-\t10\t0\t0\t0\t0\t0\t2\tFBtr0073425, FBtr0333963\n+FBgn0025111+FBgn0003360:E006\tFBgn0025111+FBgn0003360\tE006\t0.745311945942461\t5.357718424361\t1.46992595946872\t0.225357375991809\tNA\t0.504555945672202\t1.65071837177629\t3.42315484512434\tchrX\t10780893\t10780992\t100\t-\t3\t1\t0\t0\t0\t0\t3\tFBtr0073424\n+FBgn0025111+FBgn0003360:E007\tFBgn0025111+FBgn0003360\tE007\t545.847775517443\t0.00833350838114172\t207.741013858374\t4.27292204331129e-47\t2.56621015616168e-43\t23.6260619029107\t12.5792144461011\t-2.2978357205501\tchrX\t10780993\t10782095\t1103\t-\t872\t0\t0\t10527\t11\t2\t7403\tFBtr0073424, FBtr0346384, FBtr0073421, FBtr0073423, FBtr0073422\n+FBgn0025111+FBgn0003360:E008\tFBgn0025111+FBgn0003360\tE008\t81.6833873705682\t0.211161242432226\t2.10416871447788\t0.146898156016142\t1\t10.4870761158077\t4.81409558326169\t-2.35561606761604\tchrX\t10782173\t10782300\t128\t-\t149\t0\t0\t759\t1\t1\t1518\tFBtr0073424, FBtr0346384, FBtr0073421, FBtr0073423, FBtr0073422\n+FBgn0025111+FBgn0003360:E009\tFBgn0025111+FBgn0003360\tE009\t176.258300135302\t0.0140072450761411\t57.2712260328214\t3.7966937524597e-14\t5.7004983759587e-11\t15.7453363154864\t8.15612049317458\t-2.12238613649047\tchrX\t10782380\t10782680\t301\t-\t316\t0\t0\t3595\t6\t0\t2253\tFBtr0073424, FBtr0346384, FBtr0073421, FBtr0073423, FBtr0073422\n+FBgn0025111+FBgn0003360:E010\tFBgn0025111+FBgn0003360\tE010\t3.76512107787728\t1.6130831872072\t1.24033810568785\t0.265405902564654\tNA\t1.68455418493543\t3.44295065004409\t2.07400290277306\tchrX\t10782777\t10782889\t113\t-\t102\t0\t0\t28\t0\t1\t16\tFBtr0073424, FBtr0073422\n+FBgn0025111+FBgn0003360:E011\tFBgn0025111+FBgn0003360\tE011\t6.5089881698589\t1.02475799543533\t-0.106580642591283\t1\tNA\t3.07929058112916\t2.2118244557951\t-0.960549001137653\tchrX\t10784351\t10784458\t108\t-\t31\t0\t0\t111\t1\t0\t76\tFBtr0073424, FBtr0073423\n+FBgn0025111+FBgn0003360:E012\tFBgn0025111+FBgn0003360\tE'..b'2215489960084\t1\t4.90177560166337\t5.15884080320152\t0.150757731546162\tchr3R\t24145193\t24145870\t678\t+\t29\t0\t0\t750\t0\t0\t648\tFBtr0084549\n+FBgn0039155:E003\tFBgn0039155\tE003\t9.59400268282558\t0.106779096337916\t0.908702915845097\t0.340458807683609\t1\t2.295297222532\t1.37949280433586\t-1.47336111077149\tchr3R\t24145930\t24146048\t119\t+\t2\t0\t0\t135\t0\t0\t165\tFBtr0084549\n+FBgn0039155:E004\tFBgn0039155\tE004\t21.818733441502\t0.0102195118081709\t0.346088392922738\t0.556335792123203\t1\t3.72178079518322\t3.23201345868211\t-0.411439521105551\tchr3R\t24146109\t24146302\t194\t+\t11\t0\t0\t389\t1\t0\t318\tFBtr0084549\n+FBgn0039155:E005\tFBgn0039155\tE005\t33.6638874829461\t0.0192821592660858\t1.63087827191582\t0.201581614240006\t1\t4.34400029242237\t3.65686503916905\t-0.503801897354084\tchr3R\t24146366\t24146739\t374\t+\t13\t1\t0\t682\t2\t0\t431\tFBtr0084549\n+FBgn0039155:E006\tFBgn0039155\tE006\t6.29083274168577\t0.0252516535063752\t3.7437428395564\t0.0530055867882575\tNA\t1.95416666238726\t2.78389648685824\t1.02609607038693\tchr3R\t24146801\t24146867\t67\t+\t7\t1\t0\t113\t0\t0\t79\tFBtr0084549\n+FBgn0039155:E007\tFBgn0039155\tE007\t12.3243820234843\t0.0262778274885892\t2.13624519448128\t0.143853608869934\t1\t2.67736324784513\t3.39457417755365\t0.690361764818206\tchr3R\t24147196\t24147490\t295\t+\t11\t1\t0\t238\t0\t1\t147\tFBtr0084549\n+FBgn0039827:E001\tFBgn0039827\tE001\t2.63498804451882\t0.0608176505616557\t0.414823799440974\t0.519531233750519\tNA\t1.59698175719074\t1.14605646107818\t-0.958911104309699\tchr3R\t31196916\t31197135\t220\t+\t2\t0\t0\t48\t0\t0\t39\tFBtr0085755\n+FBgn0039827:E002\tFBgn0039827\tE002\t2.05937277346515\t1.77137857854951\t0.458134137387447\t0.498497279931254\tNA\t1.19223115015567\t0.0433668339050932\t-9.56366021393415\tchr3R\t31199171\t31199288\t118\t+\t0\t0\t0\t36\t0\t0\t32\tFBtr0085755\n+FBgn0039827:E003\tFBgn0039827\tE003\t1.93543290374953\t0.123855592007312\t0.400739231241019\t0.526707733863682\tNA\t1.35176238289198\t0.81056910682808\t-1.47715610462916\tchr3R\t31199457\t31199634\t178\t+\t1\t0\t0\t35\t0\t0\t29\tFBtr0085755\n+FBgn0039827:E004\tFBgn0039827\tE004\t3.28985825548349\t0.0893690013877094\t1.3505968811039\t0.245173796359894\tNA\t1.58175203407803\t2.19601130375313\t0.949671751426194\tchr3R\t31199721\t31199940\t220\t+\t6\t0\t1\t59\t0\t0\t33\tFBtr0085755\n+FBgn0039827:E005\tFBgn0039827\tE005\t3.71984502435892\t0.105905873684463\t0.107135509118166\t0.743429186401977\tNA\t1.7435517258432\t1.60688982797305\t-0.236098009333743\tchr3R\t31200404\t31200565\t162\t+\t4\t0\t0\t65\t1\t0\t44\tFBtr0085755\n+FBgn0039827:E006\tFBgn0039827\tE006\t1.49855059052996\t0.142636861571954\t0.251438625597252\t0.616063913732651\tNA\t1.17582262214242\t1.39184890589876\t0.487369179091324\tchr3R\t31200909\t31200982\t74\t+\t3\t0\t0\t24\t0\t0\t23\tFBtr0085755\n+FBgn0039827:E007\tFBgn0039827\tE007\t9.62489678410572\t0.0167104742226843\t0.0291662271828628\t0.864395791606263\t1\t2.95394188431291\t2.81385568422878\t-0.141211731937923\tchr3R\t31201059\t31201820\t762\t+\t11\t1\t0\t175\t0\t0\t127\tFBtr0085755\n+FBgn0039827:E008\tFBgn0039827\tE008\t8.16018283400104\t0.0168806044751327\t1.4042102686353\t0.236019904522537\t1\t2.69695467533777\t3.13897231666437\t0.44119600007341\tchr3R\t31201881\t31202298\t418\t+\t14\t0\t1\t142\t0\t0\t106\tFBtr0085755\n+FBgn0039827:E009\tFBgn0039827\tE009\t6.36726616885434\t0.024157041511076\t1.69280647573429\t0.193231426709786\tNA\t2.48935040136838\t1.81054974057187\t-0.922389941550072\tchr3R\t31202355\t31202678\t324\t+\t5\t0\t0\t126\t0\t0\t89\tFBtr0085755\n+FBgn0039827:E010\tFBgn0039827\tE010\t3.61313432079147\t0.0615567372635995\t0.118050038473861\t0.731159078754113\tNA\t1.82539909664026\t1.96967561924679\t0.220187796338587\tchr3R\t31203048\t31203236\t189\t+\t6\t0\t0\t56\t0\t0\t57\tFBtr0085755\n+FBgn0039827:E011\tFBgn0039827\tE011\t1.99064386054588\t0.100802457520676\t0.320881415004813\t0.571078427721341\tNA\t1.35667514561751\t1.60773535197048\t0.490854902982875\tchr3R\t31203372\t31203457\t86\t+\t4\t0\t0\t31\t0\t0\t31\tFBtr0085755\n+FBgn0039827:E012\tFBgn0039827\tE012\t2.19810573129925\t0.0718896852308745\t0.192314515449624\t0.660997207527994\tNA\t1.44207416809348\t1.61260405533928\t0.323155632427585\tchr3R\t31203542\t31203722\t181\t+\t4\t0\t0\t37\t0\t0\t33\tFBtr0085755\n+FBgn0264475:E001\tFBgn0264475\tE001\t109.782378649456\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tchr3L\t820759\t821512\t754\t+\t348\t1\t0\t1911\t4\t0\t1484\tFBtr0332751\n'