Repository 'deseq2'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/deseq2

Changeset 20:89d26b11d452 (2018-12-06)
Previous changeset 19:c56e0689e46e (2018-12-04) Next changeset 21:a6fc9228e1a0 (2019-01-28)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 82fc6e1098b8af8b769ff07689704c5275b76459
modified:
deseq2.xml
get_deseq_dataset.R
added:
test-data/Homo_sapiens.GRCh38.94.gtf
test-data/sailfish_ensembl/sailfish_quant.sf1.tab
test-data/sailfish_ensembl/sailfish_quant.sf2.tab
test-data/sailfish_ensembl/sailfish_quant.sf3.tab
test-data/sailfish_ensembl/sailfish_quant.sf4.tab
test-data/sailfish_ensembl/sailfish_quant.sf5.tab
test-data/sailfish_ensembl/sailfish_quant.sf6.tab
removed:
test-data/out_deseq2_sailfish.tab
b
diff -r c56e0689e46e -r 89d26b11d452 deseq2.xml
--- a/deseq2.xml Tue Dec 04 08:19:06 2018 -0500
+++ b/deseq2.xml Thu Dec 06 15:49:22 2018 -0500
[
@@ -348,7 +348,7 @@
                 </assert_contents>
             </output>
         </test>
-        <!--Ensure Sailfish/Salmon input with GFF3 annotation works-->
+        <!--Ensure Sailfish/Salmon input with GFF3 annotation from NCBI works-->
         <test expect_num_outputs="1">
             <repeat name="rep_factorName">
                 <param name="factorName" value="Treatment"/>
@@ -372,6 +372,30 @@
                 </assert_contents>
             </output>
         </test>
+        <!--Ensure Sailfish/Salmon input with GTF annotation from Ensembl works-->
+        <test expect_num_outputs="1">
+            <repeat name="rep_factorName">
+                <param name="factorName" value="Treatment"/>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Treated"/>
+                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf1.tab,sailfish_ensembl/sailfish_quant.sf2.tab,sailfish_ensembl/sailfish_quant.sf3.tab"/>
+                </repeat>
+                <repeat name="rep_factorLevel">
+                    <param name="factorLevel" value="Untreated"/>
+                    <param name="countsFile" value="sailfish_ensembl/sailfish_quant.sf4.tab,sailfish_ensembl/sailfish_quant.sf5.tab,sailfish_ensembl/sailfish_quant.sf6.tab"/>
+                </repeat>
+            </repeat>
+            <param name="pdf" value="False"/>
+            <param name="tximport_selector" value="tximport"/>
+            <param name="txtype" value="sailfish"/>
+            <param name="mapping_format_selector" value="gtf"/>
+            <param name="gtf_file" value="Homo_sapiens.GRCh38.94.gtf" ftype="gtf"/>
+            <output name="deseq_out" >
+                <assert_contents>
+                    <has_text_matching expression="ENSG00000168671\t1.8841.*\t-0.1180.*\t0.7429.*\t-0.1589.*\t0.8737.*\t0.9999.*" />
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 .. class:: infomark
b
diff -r c56e0689e46e -r 89d26b11d452 get_deseq_dataset.R
--- a/get_deseq_dataset.R Tue Dec 04 08:19:06 2018 -0500
+++ b/get_deseq_dataset.R Thu Dec 06 15:49:22 2018 -0500
[
@@ -57,13 +57,16 @@
         })
         txdb <- makeTxDbFromGFF(gffFile)
         k <- keys(txdb, keytype = "TXNAME")
-        tx2gene <- select(txdb, k, "GENEID", "TXNAME")
+        tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME")
+        # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name)
+        tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME)
       }
       try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene))
       if (!exists("txi")) {
-        # Remove version from transcript IDs
-        tx2gene$TXNAME <- sub('\\.[0-9]+', '', tx2gene$TXNAME)
-        txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)
+        # Remove version from transcript IDs in tx2gene...
+        tx2gene$TXNAME <- sub('\\.[0-9]+$', '', tx2gene$TXNAME)
+        # ...and in txiFiles
+        txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene, ignoreTxVersion=TRUE)
       }
       dds <- DESeqDataSetFromTximport(txi,
                                       subset(sampleTable, select=-c(filename)),
b
diff -r c56e0689e46e -r 89d26b11d452 test-data/Homo_sapiens.GRCh38.94.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Homo_sapiens.GRCh38.94.gtf Thu Dec 06 15:49:22 2018 -0500
b
b'@@ -0,0 +1,178 @@\n+#!genome-build GRCh38.p12\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.27\n+#!genebuild-last-updated 2018-07\n+# Trimmed version of ftp://ftp.ensembl.org/pub/release-94/gtf/homo_sapiens/Homo_sapiens.GRCh38.94.gtf.gz\n+5\tensembl_havana\tgene\t36035017\t36071358\t.\t-\t.\tgene_id "ENSG00000168671"; gene_version "9"; gene_name "UGT3A2"; gene_source "ensembl_havana"; gene_biotype "protein_coding";\n+5\tensembl_havana\ttranscript\t36035017\t36066891\t.\t-\t.\tgene_id "ENSG00000168671"; gene_version "9"; transcript_id "ENST00000282507"; transcript_version "7"; gene_name "UGT3A2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "UGT3A2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS3914"; tag "basic"; transcript_support_level "1";\n+5\tensembl_havana\texon\t36066696\t36066891\t.\t-\t.\tgene_id "ENSG00000168671"; gene_version "9"; transcript_id "ENST00000282507"; transcript_version "7"; exon_number "1"; gene_name "UGT3A2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "UGT3A2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS3914"; exon_id "ENSE00001299572"; exon_version "2"; tag "basic"; transcript_support_level "1";\n+5\tensembl_havana\tCDS\t36066696\t36066789\t.\t-\t0\tgene_id "ENSG00000168671"; gene_version "9"; transcript_id "ENST00000282507"; transcript_version "7"; exon_number "1"; gene_name "UGT3A2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "UGT3A2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS3914"; protein_id "ENSP00000282507"; protein_version "3"; tag "basic"; transcript_support_level "1";\n+5\tensembl_havana\tstart_codon\t36066787\t36066789\t.\t-\t0\tgene_id "ENSG00000168671"; gene_version "9"; transcript_id "ENST00000282507"; transcript_version "7"; exon_number "1"; gene_name "UGT3A2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "UGT3A2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS3914"; tag "basic"; transcript_support_level "1";\n+5\tensembl_havana\texon\t36064249\t36064350\t.\t-\t.\tgene_id "ENSG00000168671"; gene_version "9"; transcript_id "ENST00000282507"; transcript_version "7"; exon_number "2"; gene_name "UGT3A2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "UGT3A2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS3914"; exon_id "ENSE00003574795"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+5\tensembl_havana\tCDS\t36064249\t36064350\t.\t-\t2\tgene_id "ENSG00000168671"; gene_version "9"; transcript_id "ENST00000282507"; transcript_version "7"; exon_number "2"; gene_name "UGT3A2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "UGT3A2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS3914"; protein_id "ENSP00000282507"; protein_version "3"; tag "basic"; transcript_support_level "1";\n+5\tensembl_havana\texon\t36051870\t36051984\t.\t-\t.\tgene_id "ENSG00000168671"; gene_version "9"; transcript_id "ENST00000282507"; transcript_version "7"; exon_number "3"; gene_name "UGT3A2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "UGT3A2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS3914"; exon_id "ENSE00003558123"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+5\tensembl_havana\tCDS\t36051870\t36051984\t.\t-\t2\tgene_id "ENSG00000168671"; gene_version "9"; transcript_id "ENST00000282507"; transcript_version "7"; exon_number "3"; gene_name "UGT3A2"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "UGT3A2-201"; transcript_source "ensembl_havana"; transcript_biotype "protein'..b'g";\n+12\tensembl_havana\ttranscript\t54032853\t54035358\t.\t+\t.\tgene_id "ENSG00000172789"; gene_version "3"; transcript_id "ENST00000312492"; transcript_version "2"; gene_name "HOXC5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "HOXC5-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8872"; tag "basic"; transcript_support_level "1";\n+12\tensembl_havana\texon\t54032853\t54033576\t.\t+\t.\tgene_id "ENSG00000172789"; gene_version "3"; transcript_id "ENST00000312492"; transcript_version "2"; exon_number "1"; gene_name "HOXC5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "HOXC5-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8872"; exon_id "ENSE00001190317"; exon_version "4"; tag "basic"; transcript_support_level "1";\n+12\tensembl_havana\tCDS\t54033123\t54033576\t.\t+\t0\tgene_id "ENSG00000172789"; gene_version "3"; transcript_id "ENST00000312492"; transcript_version "2"; exon_number "1"; gene_name "HOXC5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "HOXC5-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8872"; protein_id "ENSP00000309336"; protein_version "2"; tag "basic"; transcript_support_level "1";\n+12\tensembl_havana\tstart_codon\t54033123\t54033125\t.\t+\t0\tgene_id "ENSG00000172789"; gene_version "3"; transcript_id "ENST00000312492"; transcript_version "2"; exon_number "1"; gene_name "HOXC5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "HOXC5-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8872"; tag "basic"; transcript_support_level "1";\n+12\tensembl_havana\texon\t54034278\t54035358\t.\t+\t.\tgene_id "ENSG00000172789"; gene_version "3"; transcript_id "ENST00000312492"; transcript_version "2"; exon_number "2"; gene_name "HOXC5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "HOXC5-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8872"; exon_id "ENSE00002053096"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+12\tensembl_havana\tCDS\t54034278\t54034489\t.\t+\t2\tgene_id "ENSG00000172789"; gene_version "3"; transcript_id "ENST00000312492"; transcript_version "2"; exon_number "2"; gene_name "HOXC5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "HOXC5-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8872"; protein_id "ENSP00000309336"; protein_version "2"; tag "basic"; transcript_support_level "1";\n+12\tensembl_havana\tstop_codon\t54034490\t54034492\t.\t+\t0\tgene_id "ENSG00000172789"; gene_version "3"; transcript_id "ENST00000312492"; transcript_version "2"; exon_number "2"; gene_name "HOXC5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "HOXC5-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8872"; tag "basic"; transcript_support_level "1";\n+12\tensembl_havana\tfive_prime_utr\t54032853\t54033122\t.\t+\t.\tgene_id "ENSG00000172789"; gene_version "3"; transcript_id "ENST00000312492"; transcript_version "2"; gene_name "HOXC5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "HOXC5-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8872"; tag "basic"; transcript_support_level "1";\n+12\tensembl_havana\tthree_prime_utr\t54034493\t54035358\t.\t+\t.\tgene_id "ENSG00000172789"; gene_version "3"; transcript_id "ENST00000312492"; transcript_version "2"; gene_name "HOXC5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "HOXC5-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8872"; tag "basic"; transcript_support_level "1";\n'
b
diff -r c56e0689e46e -r 89d26b11d452 test-data/out_deseq2_sailfish.tab
--- a/test-data/out_deseq2_sailfish.tab Tue Dec 04 08:19:06 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-MIR6859-2 1.1858265336986 -1.58325519934585 1.29566733255222 -1.22196119294536 0.221722302676964 0.886889210707857
-DDX11L1 1.91972630671259 -0.204757212641591 1.31524142129485 -0.155680325548148 0.876285006005361 0.999999999763967
-MIR6859-1 0.294068009390125 -0.712896459907293 1.28824575367762 -0.55338545294805 0.579999497913718 0.999999999763967
-WASH7P 114.545909292233 -3.75034693001422e-10 1.2677635335979 -2.95823852841924e-10 0.999999999763967 0.999999999763967
b
diff -r c56e0689e46e -r 89d26b11d452 test-data/sailfish_ensembl/sailfish_quant.sf1.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sailfish_ensembl/sailfish_quant.sf1.tab Thu Dec 06 15:49:22 2018 -0500
b
@@ -0,0 +1,16 @@
+Name Length EffectiveLength TPM NumReads
+ENST00000303460.4 1957 1759.2 0 0
+ENST00000546378.1 2035 1837.2 0 0
+ENST00000243103.3 847 649.348 0 0
+ENST00000243056.4 2395 2555.05 0.0173509 1
+ENST00000303406.4 2305 2261.56 0.114436 5.83783
+ENST00000430889.2 1665 1561.63 3.55316 125.162
+ENST00000312492.2 1610 1412.2 0 0
+ENST00000512206.1 1644 1446.2 0 0
+ENST00000243108.4 1680 1482.2 0 0
+ENST00000394331.3 2077 1879.2 0 0
+ENST00000040584.5 2286 2088.2 0 0
+ENST00000303450.4 1539 1341.2 0 0
+ENST00000513300.5 2281 2071 0.085625 4
+ENST00000282507.7 2383 2153.11 0 0
+ENST00000504685.5 1851 1659.15 0 0
b
diff -r c56e0689e46e -r 89d26b11d452 test-data/sailfish_ensembl/sailfish_quant.sf2.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sailfish_ensembl/sailfish_quant.sf2.tab Thu Dec 06 15:49:22 2018 -0500
b
@@ -0,0 +1,16 @@
+Name Length EffectiveLength TPM NumReads
+ENST00000303460.4 1957 1764.64 0 0
+ENST00000546378.1 2035 2171.69 0.0161522 1
+ENST00000243103.3 847 655.215 0 0
+ENST00000243056.4 2395 2567.91 0.01366 1
+ENST00000303406.4 2305 2235.74 0 0
+ENST00000430889.2 1665 1576.16 1.7359 78
+ENST00000312492.2 1610 1726.1 1.31761e-06 6.48367e-05
+ENST00000512206.1 1644 1704.36 0.0411608 1.99994
+ENST00000243108.4 1680 1679.47 0.10443 5
+ENST00000394331.3 2077 2069.83 0 0
+ENST00000040584.5 2286 2093.64 0 0
+ENST00000303450.4 1539 1557.46 0.0675669 3
+ENST00000513300.5 2281 2088.64 0 0
+ENST00000282507.7 2383 2190.64 0 0
+ENST00000504685.5 1851 1658.64 0 0
b
diff -r c56e0689e46e -r 89d26b11d452 test-data/sailfish_ensembl/sailfish_quant.sf3.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sailfish_ensembl/sailfish_quant.sf3.tab Thu Dec 06 15:49:22 2018 -0500
b
@@ -0,0 +1,16 @@
+Name Length EffectiveLength TPM NumReads
+ENST00000303460.4 1957 1760.44 0 0
+ENST00000546378.1 2035 2127.85 0.0280759 2
+ENST00000243103.3 847 650.573 0 0
+ENST00000243056.4 2395 2198.44 0 0
+ENST00000303406.4 2305 2223.92 0.0661957 4.92838
+ENST00000430889.2 1665 1536.71 2.08127 107.072
+ENST00000312492.2 1610 1685.74 1.23894e-08 6.99193e-07
+ENST00000512206.1 1644 1658.72 0.0720329 4
+ENST00000243108.4 1680 1638.79 0.0546819 3
+ENST00000394331.3 2077 2026.99 0 0
+ENST00000040584.5 2286 2089.44 0 0
+ENST00000303450.4 1539 1342.44 0 0
+ENST00000513300.5 2281 2047.58 0 0
+ENST00000282507.7 2383 2143.36 0 0
+ENST00000504685.5 1851 1659.97 0.0359895 2
b
diff -r c56e0689e46e -r 89d26b11d452 test-data/sailfish_ensembl/sailfish_quant.sf4.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sailfish_ensembl/sailfish_quant.sf4.tab Thu Dec 06 15:49:22 2018 -0500
b
@@ -0,0 +1,16 @@
+Name Length EffectiveLength TPM NumReads
+ENST00000303460.4 1957 1738.38 0 0
+ENST00000546378.1 2035 1816.38 0 0
+ENST00000243103.3 847 628.444 0 0
+ENST00000243056.4 2395 2176.38 0 0
+ENST00000303406.4 2305 2086.38 0 0
+ENST00000430889.2 1665 1510.43 2.31417 89.0334
+ENST00000312492.2 1610 1391.38 0 0
+ENST00000512206.1 1644 1425.38 0 0
+ENST00000243108.4 1680 1612.63 0.0730348 3
+ENST00000394331.3 2077 1998.27 0 0
+ENST00000040584.5 2286 2067.38 0 0
+ENST00000303450.4 1539 1320.38 0 0
+ENST00000513300.5 2281 2033.58 0 0
+ENST00000282507.7 2383 2129.01 0 0
+ENST00000504685.5 1851 1639.11 0.071855 3
b
diff -r c56e0689e46e -r 89d26b11d452 test-data/sailfish_ensembl/sailfish_quant.sf5.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sailfish_ensembl/sailfish_quant.sf5.tab Thu Dec 06 15:49:22 2018 -0500
b
@@ -0,0 +1,16 @@
+Name Length EffectiveLength TPM NumReads
+ENST00000303460.4 1957 1781.23 0 0
+ENST00000546378.1 2035 1859.23 0 0
+ENST00000243103.3 847 671.305 0 0
+ENST00000243056.4 2395 2219.23 0 0
+ENST00000303406.4 2305 2195.37 0 0
+ENST00000430889.2 1665 1545.8 3.39643 119.002
+ENST00000312492.2 1610 1715.91 0.128558 5
+ENST00000512206.1 1644 1468.23 0 0
+ENST00000243108.4 1680 1654.16 0.0800141 3
+ENST00000394331.3 2077 2034.63 0 0
+ENST00000040584.5 2286 2148.18 0.0205378 1
+ENST00000303450.4 1539 1363.23 0 0
+ENST00000513300.5 2281 2061.52 0 0
+ENST00000282507.7 2383 2156.64 0 0
+ENST00000504685.5 1851 1671.47 0.0527904 2
b
diff -r c56e0689e46e -r 89d26b11d452 test-data/sailfish_ensembl/sailfish_quant.sf6.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sailfish_ensembl/sailfish_quant.sf6.tab Thu Dec 06 15:49:22 2018 -0500
b
@@ -0,0 +1,16 @@
+Name Length EffectiveLength TPM NumReads
+ENST00000303460.4 1957 1747.9 0 0
+ENST00000546378.1 2035 1825.9 0 0
+ENST00000243103.3 847 638.159 0 0
+ENST00000243056.4 2395 2562.99 0.0171188 1
+ENST00000303406.4 2305 2272.12 0.152979 7.92213
+ENST00000430889.2 1665 1572.52 3.54659 127.112
+ENST00000312492.2 1610 1731.28 0.0760279 3
+ENST00000512206.1 1644 1434.9 0 0
+ENST00000243108.4 1680 1673.79 0.104853 4
+ENST00000394331.3 2077 2057.73 0 0
+ENST00000040584.5 2286 2185.1 0.0200793 1
+ENST00000303450.4 1539 1329.9 0 0
+ENST00000513300.5 2281 2047.55 0 0
+ENST00000282507.7 2383 2143.62 0 0
+ENST00000504685.5 1851 1663.05 0.0263825 1