Galaxy |

Changeset 1:e98d4ab5b5bc (2019-01-23)

Previous changeset 0:b42373cddb77 (2018-11-23) Next changeset 2:e81fbbea6e8f (2025-06-21)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deg_annotate commit 44d3dae188cabf4a64dee7c1ebe41c855d95d1b0

modified:
deg_annotate.py
deg_annotate.xml

added:
test-data/edger_output.tabular

diff -r b42373cddb77 -r e98d4ab5b5bc deg_annotate.py
--- a/deg_annotate.py Fri Nov 23 01:59:47 2018 -0500
+++ b/deg_annotate.py Wed Jan 23 07:47:22 2019 -0500

[

@@ -64,7 +64,7 @@
                     pass

     bed_entries = []
-    # create BED lines only for deseq output
+    # create BED lines only for dexeq output
     if input_type == "dexseq":
         for txid in exon_pos.keys():
             starts = sorted(exon_pos[txid])
@@ -87,8 +87,8 @@
     parser.add_argument('-in', '--input', required=True,
                         help='DESeq2/DEXSeq output. It is allowed to have extra information, '
                              'but make sure that the original output columns are not altered')
-    parser.add_argument('-m', '--mode', required=True, choices=["deseq2", "dexseq"], default='deseq2',
-                        help='Input file type')
+    parser.add_argument('-m', '--mode', required=True, choices=["degseq", "dexseq"],
+                        default='degseq', help='Input file type')
     parser.add_argument('-g', '--gff', required=True, help='The same annotation GFF/GTF file used for couting')
     parser.add_argument('-t', '--type', default='exon', required=False,
                         help='feature type (3rd column in GFF file) to be used (default: exon)')
@@ -146,19 +146,10 @@
     with open(args.input) as fh_input, open(args.output, 'w') as fh_output:
         for line in fh_input:
             annot = []
-            # Append the extra information from GFF to DESeq2 output
-            if args.mode == "deseq2":
-                geneid = line.split('\t')[0]
-                annot = [str(annotation[geneid]['chr']),
-                         str(annotation[geneid]['start']),
-                         str(annotation[geneid]['end']),
-                         str(annotation[geneid]['strand'])]
-                for a in attr:
-                    annot.append(annotation[geneid][a])
             # DEXSeq exonic bins might originate from aggrigating multiple genes. They are are separated by '+'
             # Append the attributes from the GFF but keep the order of the aggregated genes and use '+'
             # Aappend the transcript id and exon number from the annotation that correspond to the DEXseq counting bins
-            elif args.mode == "dexseq":
+            if args.mode == "dexseq":
                 geneids = line.split('\t')[1].split('+')
                 for a in attr:
                     tmp = []
@@ -171,6 +162,15 @@
                         annot.append(','.join(sorted(set(d_binexon[binid]))))
                     except KeyError:
                         annot.append('NA')
+            # Append the extra information from GFF to DESeq2/edgeR/limma output
+            else:
+                geneid = line.split('\t')[0]
+                annot = [str(annotation[geneid]['chr']),
+                         str(annotation[geneid]['start']),
+                         str(annotation[geneid]['end']),
+                         str(annotation[geneid]['strand'])]
+                for a in attr:
+                    annot.append(annotation[geneid][a])
             fh_output.write(line.rstrip('\n') + '\t' + '\t'.join(annot) + '\n')

diff -r b42373cddb77 -r e98d4ab5b5bc deg_annotate.xml
--- a/deg_annotate.xml Fri Nov 23 01:59:47 2018 -0500
+++ b/deg_annotate.xml Wed Jan 23 07:47:22 2019 -0500

@@ -1,5 +1,5 @@
-<tool id="deg_annotate" name="Annotate DESeq2/DEXSeq output tables" version="1.0">
-    <description>Append useful information from annotation files to DESeq2/DEXSeq outputs</description>
+<tool id="deg_annotate" name="Annotate DESeq2/DEXSeq output tables" version="1.1.0">
+    <description>Append annotation from GTF to differential expression tool outputs</description>
     <requirements>
         <requirement type="package" version="2.27.0">bedtools</requirement>
         <requirement type="package" version="0.6.4">bcbiogff</requirement>
@@ -21,10 +21,10 @@
                type="data"
                format="tabular"
                argument="-in"
-               label="Tabular output of DESeq2 or DEXSeq"/>
+               label="Tabular output of DESeq2/edgeR/limma/DEXSeq"/>

         <param name="mode" type="select" argument="-m" label="Input file type">
-                <option value="deseq2">DESeq2</option>
+                <option value="degseq">DESeq2/edgeR/limma</option>
                 <option value="dexseq">DEXseq</option>
         </param>

@@ -92,6 +92,19 @@
                 </assert_contents>
             </output>
         </test>
+        <test expect_num_outputs="1">
+            <param name="input_table"
+                value="edger_output.tabular"/>
+            <param name="annotation"
+               value="annotation.gtf"/>
+            <param name="mode"
+               value="degseq"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text_matching expression="FBgn0039155\t-4.40480020002641\t5.8344799947229\t573.433304439283\t1.62187751744916e-36\t2.54342832286378e-32\tchr3R\t24141394\t24147490\t\+\tprotein_coding\tKal1"/>
+                </assert_contents>
+            </output>
+        </test>
     </tests>

     <help>
@@ -99,7 +112,7 @@

**What it does**

-    This tool appends the output table of DESeq2 or DEXSeq with gene symbols, biotypes, positions etc. The information
+    This tool appends the output table of DESeq2/edgeR/limma/DEXSeq with gene symbols, biotypes, positions etc. The information
     you want to add is configurable. This information should present in the input GTF/GFF file as attributes of feature
     you choose.
     DEXSeq-Count tool is used to prepare the DEXSeq compatible annotation (flattened GTF file) from input GTF/GFF. In

diff -r b42373cddb77 -r e98d4ab5b5bc test-data/edger_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/edger_output.tabular Wed Jan 23 07:47:22 2019 -0500

@@ -0,0 +1,9 @@
+GeneID logFC logCPM F PValue FDR
+FBgn0039155 -4.40480020002641 5.8344799947229 573.433304439283 1.62187751744916e-36 2.54342832286378e-32
+FBgn0003360 -3.1582167817878 8.40195244059253 458.432046181347 2.23362781352968e-33 1.75138756858862e-29
+FBgn0025111 2.91430384030448 6.87729756446126 452.782656958761 4.06857731787388e-32 2.12678098329661e-28
+FBgn0029167 -2.27266685614162 8.1768807997853 280.441288717184 7.70643803794637e-27 3.02130903277688e-23
+FBgn0035085 -2.53273511974769 5.55909941151563 251.043462255658 1.90325815010934e-25 5.96937886200293e-22
+FBgn0264475 -2.4548323117841 5.63483344861135 228.705124005753 4.42554793229512e-24 1.15669071123753e-20
+FBgn0039827 -3.99649258355212 4.15961375651663 233.059587232752 9.19923669871639e-24 2.06089185584672e-20
+FBgn0000071 2.57846147731144 4.8005671380096 204.62749320464 5.74900194240093e-23 1.03034290930057e-19