Mercurial > repos > ieguinoa > tximport
changeset 0:2f5e9c0fe367 draft default tip
"planemo upload for repository https://github.com/ieguinoa/tximport-galaxy-wrapper commit 2bb25471c1320fb1206afa2c4daf536b6d6e275f-dirty"
author | ieguinoa |
---|---|
date | Wed, 09 Oct 2019 15:38:21 -0400 |
parents | |
children | |
files | README.md test-data/Araport11_subset.gff3 test-data/cached_locally/tx2gene.loc test-data/custom_sample.tab test-data/salmon_sample1.tab test-data/salmon_sample2.tab test-data/tx2gene.tab tool-data/tx2gene.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test tximport-galaxy-wrapper.tar tximport.R tximport.xml |
diffstat | 13 files changed, 532 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,2 @@ +## tximport-galaxy-wrapper +Wrapper for the package [tximport](https://bioconductor.org/packages/release/bioc/html/tximport.html)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Araport11_subset.gff3 Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,106 @@ +Chr1 Araport11 gene 3631 5899 . + . ID=AT1G01010;Alias=ANAC001,NAC domain containing protein 1;symbol=NAC001;tid=AT1G01010.1;Name=AT1G01010;gene_id=AT1G01010 +Chr1 Araport11 mRNA 3631 5899 . + . ID=AT1G01010.1;Parent=AT1G01010;Name=AT1G01010.1;gene_id=AT1G01010 +Chr1 Araport11 exon 3631 3913 . + . ID=AT1G01010.1:exon:1;Parent=AT1G01010.1;Name=AT1G01010;gene_id=AT1G01010 +Chr1 Araport11 five_prime_UTR 3631 3759 . + . ID=AT1G01010.1:five_prime_UTR;Parent=AT1G01010.1;Name=AT1G01010;gene_id=AT1G01010 +Chr1 Araport11 exon 3996 4276 . + . ID=AT1G01010.1:exon:2;Parent=AT1G01010.1;Name=AT1G01010;gene_id=AT1G01010 +Chr1 Araport11 exon 4486 4605 . + . ID=AT1G01010.1:exon:3;Parent=AT1G01010.1;Name=AT1G01010;gene_id=AT1G01010 +Chr1 Araport11 exon 4706 5095 . + . ID=AT1G01010.1:exon:4;Parent=AT1G01010.1;Name=AT1G01010;gene_id=AT1G01010 +Chr1 Araport11 exon 5174 5326 . + . ID=AT1G01010.1:exon:5;Parent=AT1G01010.1;Name=AT1G01010;gene_id=AT1G01010 +Chr1 Araport11 exon 5439 5899 . + . ID=AT1G01010.1:exon:6;Parent=AT1G01010.1;Name=AT1G01010;gene_id=AT1G01010 +Chr1 Araport11 five_prime_UTR 3759 5439 . + . ID=AT1G01010.1:five_prime_UTR;Parent=AT1G01010.1;Name=AT1G01010;gene_id=AT1G01010 +Chr1 Araport11 three_prime_UTR 5631 5899 . + . ID=AT1G01010.1:three_prime_UTR;Parent=AT1G01010.1;Name=AT1G01010;gene_id=AT1G01010 +Chr1 Araport11 gene 6788 9130 . - . ID=AT1G01020;symbol=ARV1;tid=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 mRNA 6788 9130 . - . ID=AT1G01020.5;Parent=AT1G01020;Name=AT1G01020.5;gene_id=AT1G01020 +Chr1 Araport11 exon 6788 7069 . - . ID=AT1G01020.5:exon:1;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 6788 6914 . - . ID=AT1G01020.5:five_prime_UTR;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7157 7232 . - . ID=AT1G01020.5:exon:2;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7384 7450 . - . ID=AT1G01020.5:exon:3;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7564 7649 . - . ID=AT1G01020.5:exon:4;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7762 7835 . - . ID=AT1G01020.5:exon:5;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7942 7987 . - . ID=AT1G01020.5:exon:6;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8236 8325 . - . ID=AT1G01020.5:exon:7;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8417 8464 . - . ID=AT1G01020.5:exon:8;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 6914 8417 . - . ID=AT1G01020.5:five_prime_UTR;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 three_prime_UTR 8420 8464 . - . ID=AT1G01020.5:three_prime_UTR;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8594 9130 . - . ID=AT1G01020.5:exon:9;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 three_prime_UTR 8594 9130 . - . ID=AT1G01020.5:three_prime_UTR;Parent=AT1G01020.5;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 mRNA 6788 9130 . - . ID=AT1G01020.4;Parent=AT1G01020;Name=AT1G01020.4;gene_id=AT1G01020 +Chr1 Araport11 exon 6788 7069 . - . ID=AT1G01020.4:exon:1;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 6788 6914 . - . ID=AT1G01020.4:five_prime_UTR;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7157 7232 . - . ID=AT1G01020.4:exon:2;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7384 7450 . - . ID=AT1G01020.4:exon:3;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7564 7649 . - . ID=AT1G01020.4:exon:4;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7762 7835 . - . ID=AT1G01020.4:exon:5;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7942 7987 . - . ID=AT1G01020.4:exon:6;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8236 8464 . - . ID=AT1G01020.4:exon:7;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 6914 8236 . - . ID=AT1G01020.4:five_prime_UTR;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 three_prime_UTR 8443 8464 . - . ID=AT1G01020.4:three_prime_UTR;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8594 9130 . - . ID=AT1G01020.4:exon:8;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 three_prime_UTR 8594 9130 . - . ID=AT1G01020.4:three_prime_UTR;Parent=AT1G01020.4;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 mRNA 6788 9130 . - . ID=AT1G01020.3;Parent=AT1G01020;Name=AT1G01020.3;gene_id=AT1G01020 +Chr1 Araport11 exon 6788 7069 . - . ID=AT1G01020.3:exon:1;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 6788 6914 . - . ID=AT1G01020.3:five_prime_UTR;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7157 7232 . - . ID=AT1G01020.3:exon:2;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7384 7450 . - . ID=AT1G01020.3:exon:3;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7564 7649 . - . ID=AT1G01020.3:exon:4;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7762 7835 . - . ID=AT1G01020.3:exon:5;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7942 7987 . - . ID=AT1G01020.3:exon:6;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8236 8464 . - . ID=AT1G01020.3:exon:7;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 6914 8236 . - . ID=AT1G01020.3:five_prime_UTR;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 three_prime_UTR 8443 8464 . - . ID=AT1G01020.3:three_prime_UTR;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8571 9130 . - . ID=AT1G01020.3:exon:8;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 three_prime_UTR 8571 9130 . - . ID=AT1G01020.3:three_prime_UTR;Parent=AT1G01020.3;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 mRNA 6788 9130 . - . ID=AT1G01020.1;Parent=AT1G01020;Name=AT1G01020.1;gene_id=AT1G01020 +Chr1 Araport11 exon 6788 7069 . - . ID=AT1G01020.1:exon:1;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 6788 6914 . - . ID=AT1G01020.1:five_prime_UTR;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7157 7232 . - . ID=AT1G01020.1:exon:2;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7384 7450 . - . ID=AT1G01020.1:exon:3;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7564 7649 . - . ID=AT1G01020.1:exon:4;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7762 7835 . - . ID=AT1G01020.1:exon:5;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7942 7987 . - . ID=AT1G01020.1:exon:6;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8236 8325 . - . ID=AT1G01020.1:exon:7;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8417 8464 . - . ID=AT1G01020.1:exon:8;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8571 9130 . - . ID=AT1G01020.1:exon:9;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 6914 8571 . - . ID=AT1G01020.1:five_prime_UTR;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 three_prime_UTR 8667 9130 . - . ID=AT1G01020.1:three_prime_UTR;Parent=AT1G01020.1;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 mRNA 6788 8737 . - . ID=AT1G01020.2;Parent=AT1G01020;Name=AT1G01020.2;gene_id=AT1G01020 +Chr1 Araport11 exon 6788 7069 . - . ID=AT1G01020.2:exon:1;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 6788 7069 . - . ID=AT1G01020.2:five_prime_UTR;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7157 7450 . - . ID=AT1G01020.2:exon:2;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 7157 7314 . - . ID=AT1G01020.2:five_prime_UTR;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7564 7649 . - . ID=AT1G01020.2:exon:3;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7762 7835 . - . ID=AT1G01020.2:exon:4;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7942 7987 . - . ID=AT1G01020.2:exon:5;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8236 8325 . - . ID=AT1G01020.2:exon:6;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8417 8464 . - . ID=AT1G01020.2:exon:7;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8571 8737 . - . ID=AT1G01020.2:exon:8;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 7314 8571 . - . ID=AT1G01020.2:five_prime_UTR;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 three_prime_UTR 8667 8737 . - . ID=AT1G01020.2:three_prime_UTR;Parent=AT1G01020.2;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 mRNA 6788 8737 . - . ID=AT1G01020.6;Parent=AT1G01020;Name=AT1G01020.6;gene_id=AT1G01020 +Chr1 Araport11 exon 6788 7069 . - . ID=AT1G01020.6:exon:1;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 6788 7069 . - . ID=AT1G01020.6:five_prime_UTR;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7157 7450 . - . ID=AT1G01020.6:exon:2;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 7157 7314 . - . ID=AT1G01020.6:five_prime_UTR;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 7564 7649 . - . ID=AT1G01020.6:exon:3;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8236 8325 . - . ID=AT1G01020.6:exon:4;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8417 8464 . - . ID=AT1G01020.6:exon:5;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 five_prime_UTR 7314 8417 . - . ID=AT1G01020.6:five_prime_UTR;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 three_prime_UTR 8420 8464 . - . ID=AT1G01020.6:three_prime_UTR;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 exon 8594 8737 . - . ID=AT1G01020.6:exon:6;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 three_prime_UTR 8594 8737 . - . ID=AT1G01020.6:three_prime_UTR;Parent=AT1G01020.6;Name=AT1G01020;gene_id=AT1G01020 +Chr1 Araport11 gene 11649 13714 . - . ID=AT1G01030;symbol=NGA3;full_name=NGATHA3;tid=AT1G01030.1;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 mRNA 11649 13714 . - . ID=AT1G01030.1;Parent=AT1G01030;Name=AT1G01030.1;gene_id=AT1G01030 +Chr1 Araport11 exon 11649 13173 . - . ID=AT1G01030.1:exon:1;Parent=AT1G01030.1;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 five_prime_UTR 11649 11863 . - . ID=AT1G01030.1:five_prime_UTR;Parent=AT1G01030.1;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 three_prime_UTR 12941 13173 . - . ID=AT1G01030.1:three_prime_UTR;Parent=AT1G01030.1;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 exon 13335 13714 . - . ID=AT1G01030.1:exon:2;Parent=AT1G01030.1;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 three_prime_UTR 13335 13714 . - . ID=AT1G01030.1:three_prime_UTR;Parent=AT1G01030.1;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 mRNA 11649 13714 . - . ID=AT1G01030.2;Parent=AT1G01030;Name=AT1G01030.2;gene_id=AT1G01030 +Chr1 Araport11 exon 11649 12354 . - . ID=AT1G01030.2:exon:1;Parent=AT1G01030.2;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 five_prime_UTR 11649 11863 . - . ID=AT1G01030.2:five_prime_UTR;Parent=AT1G01030.2;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 exon 12424 13173 . - . ID=AT1G01030.2:exon:2;Parent=AT1G01030.2;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 five_prime_UTR 11863 12424 . - . ID=AT1G01030.2:five_prime_UTR;Parent=AT1G01030.2;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 three_prime_UTR 12941 13173 . - . ID=AT1G01030.2:three_prime_UTR;Parent=AT1G01030.2;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 exon 13335 13714 . - . ID=AT1G01030.2:exon:3;Parent=AT1G01030.2;Name=AT1G01030;gene_id=AT1G01030 +Chr1 Araport11 three_prime_UTR 13335 13714 . - . ID=AT1G01030.2:three_prime_UTR;Parent=AT1G01030.2;Name=AT1G01030;gene_id=AT1G01030 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/tx2gene.loc Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,5 @@ +#The tx2gene.loc file has this format: +# +#<unique_build_id> <dbkey> <display_name> <path_to_tx2gene_file> + +Ath_Araport11_subset Ath Arabidopsis thaliana - Araport11 - subset ${__HERE__}/../tx2gene.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/custom_sample.tab Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,14 @@ +Transcript_id_here Here_goes_the_length Extra_useless_column_1 Abundance_goes_here Extra_useless_column_2 Here_goes_the_counts +AT1G01010.1 1688 1487.57 4.6817 1 156 +AT1G01020.3 1420 1219.57 1.68167 2 45.9399 +AT1G01020.4 711 510.567 6.65521 3 76.113 +AT1G01020.1 738 537.567 18.1462 4 218.506 +AT1G01020.5 1179 978.567 0.491433 5 10.7721 +AT1G01020.6 617 416.567 0.607591 6 5.66944 +AT1G01020.2 647 446.567 2.63345e-08 7 2.63425e-07 +AT1G03987.1 272 97.3356 0 8 0 +AT1G01030.2 1456 1255.57 0.519509 9 14.6109 +AT1G01030.1 1905 1704.57 0.298284 10 11.3891 +AT1G01040.1 6276 6075.57 1.56643 11 213.178 +AT1G03993.1 788 587.567 0 12 0 +AT1G01040.2 5877 5676.57 4.30834 13 547.822
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/salmon_sample1.tab Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,14 @@ +Name Length EffectiveLength TPM NumReads +AT1G01010.1 1688 1487.57 4.6817 156 +AT1G01020.3 1420 1219.57 1.68167 45.9399 +AT1G01020.4 711 510.567 6.65521 76.113 +AT1G01020.1 738 537.567 18.1462 218.506 +AT1G01020.5 1179 978.567 0.491433 10.7721 +AT1G01020.6 617 416.567 0.607591 5.66944 +AT1G01020.2 647 446.567 2.63345e-08 2.63425e-07 +AT1G03987.1 272 97.3356 0 0 +AT1G01030.2 1456 1255.57 0.519509 14.6109 +AT1G01030.1 1905 1704.57 0.298284 11.3891 +AT1G01040.1 6276 6075.57 1.56643 213.178 +AT1G03993.1 788 587.567 0 0 +AT1G01040.2 5877 5676.57 4.30834 547.822
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/salmon_sample2.tab Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,14 @@ +Name Length EffectiveLength TPM NumReads +AT1G01010.1 1688 1487.57 4.6817 156 +AT1G01020.3 1420 1219.57 1.68167 45.9399 +AT1G01020.4 711 510.567 6.65521 76.113 +AT1G01020.1 738 537.567 18.1462 218.506 +AT1G01020.5 1179 978.567 0.491433 10.7721 +AT1G01020.6 617 416.567 0.607591 5.66944 +AT1G01020.2 647 446.567 2.63345e-08 2.63425e-07 +AT1G03987.1 272 97.3356 0 0 +AT1G01030.2 1456 1255.57 0.519509 14.6109 +AT1G01030.1 1905 1704.57 0.298284 11.3891 +AT1G01040.1 6276 6075.57 1.56643 213.178 +AT1G03993.1 788 587.567 0 0 +AT1G01040.2 5877 5676.57 4.30834 547.822
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tx2gene.tab Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,14 @@ +Transcript Gene +AT1G01010.1 AT1G01010 +AT1G01020.3 AT1G01020 +AT1G01020.4 AT1G01020 +AT1G01020.1 AT1G01020 +AT1G01020.5 AT1G01020 +AT1G01020.6 AT1G01020 +AT1G01020.2 AT1G01020 +AT1G03987.1 AT1G03987 +AT1G01030.2 AT1G01030 +AT1G01030.1 AT1G01030 +AT1G01040.1 AT1G01040 +AT1G03993.1 AT1G03993 +AT1G01040.2 AT1G01040
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/tx2gene.loc.sample Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,1 @@ +#value, dbkey, name, path
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,7 @@ +<?xml version="1.0"?> +<tables> + <table name="tx2gene_table" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/tx2gene.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <table name="tx2gene_table" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/cached_locally/tx2gene.loc" /> + </table> +</tables> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tximport.R Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,119 @@ +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +library("getopt") +#library("tools") +options(stringAsFactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs(trailingOnly = TRUE) + +# get options, using the spec as defined by the enclosed list. +# we read the options from the default: commandArgs(TRUE). +spec <- matrix(c( + "help", "h", 0, "logical", + "base_dir", "w", 1, "character", + "out_file", "o", 1, "character", + "countsFiles", "n", 1, "character", + "countsFromAbundance", "r", 1, "character", + "format", "v", 1, "character", + "gff_file", "H", 0, "character", + "tx2gene", "f", 0, "character", + "geneIdCol", "l", 0, "character", + "txIdCol" , "p", 1, "character", + "abundanceCol", "i", 0, "character", + "countsCol", "y", 1, "character", + "lengthCol", "x", 1, "character"), + byrow=TRUE, ncol=4) + +opt <- getopt(spec) + + + + +# if help was asked for print a friendly message +# and exit with a non-zero error code +if (!is.null(opt$help)) { + cat(getopt(spec, usage=TRUE)) + q(status=1) +} + +if (is.null(opt$gff_file) & is.null(opt$tx2gene)) { + cat("A GFF/GTF file or a tx2gene table is required\n") + q(status=1) +} + +if (opt$format == 'none'){ #custom format + if (is.null(opt$txIdCol) | is.null(opt$abundanceCol) | is.null(opt$countsCol) | is.null(opt$lengthCol)) { + cat("If you select a custom format for the input files you need to specify the column names\n") + q(status=1) + } +} + +if (is.null(opt$countsFiles)) { + cat("'countsFiles' is required\n") + q(status=1) +} + +## parse counts files +library(rjson) +dat <- fromJSON(opt$countsFiles) +samples_df <- lapply(dat, function(samples) # Loop through each "sample" +{ + # Convert each group to a data frame. + # This assumes you have 6 elements each time + data.frame(matrix(unlist(samples), ncol=2, byrow=T)) +}) +samples_df <- do.call(rbind, samples_df) +colnames(samples_df) <- c("path","id") +rownames(samples_df) <- NULL + +# Prepare char vector with files and sample names +files <- file.path(samples_df[,"path"]) +names(files) <- samples_df[,"id"] +#files +#all(file.exists(files)) + + + +library(tximport) + + + + +### if the input is a gff/gtf file first need to create the tx2gene table +if (!is.null(opt$gff_file)) { + suppressPackageStartupMessages({ + library("GenomicFeatures") + }) + txdb <- makeTxDbFromGFF(opt$gff_file) + k <- keys(txdb, keytype = "TXNAME") + tx2gene <- select(txdb, keys=k, columns="GENEID", keytype="TXNAME") + # Remove 'transcript:' from transcript IDs (when gffFile is a GFF3 from Ensembl and the transcript does not have a Name) + tx2gene$TXNAME <- sub('^transcript:', '', tx2gene$TXNAME) + +} else { + tx2gene <- read.table(opt$tx2gene,header=FALSE) + } + + + +## +if (is.null(opt$geneIdCol)) { ## there is a tx2gene table + if (opt$format == 'none'){ #predefined format + cat("here i am too\n") + txi_out <- tximport(files, type="none",txIdCol=opt$txIdCol,abundanceCol=opt$abundanceCol,countsCol=opt$countsCol,lengthCol=opt$lengthCol,tx2gene=tx2gene,countsFromAbundance=opt$countsFromAbundance) + } else { + txi_out <- tximport(files, type=opt$format, tx2gene=tx2gene,countsFromAbundance=opt$countsFromAbundance) + } +} else { # the gene_ID is a column in the counts table + if (opt$format == 'none'){ #predefined format + txi_out <- tximport(files, type="none",geneIdCol=opt$geneIdCol,txIdCol=opt$txIdCol,abundanceCol=opt$abundanceCol,countsCol=opt$countsCol,lengthCol=opt$lengthCol,tx2gene=tx2gene,countsFromAbundance=opt$countsFromAbundance) + } else { + txi_out <- tximport(files, type=opt$format, geneIdCol=opt$geneIdCol,countsFromAbundance=opt$countsFromAbundance) + } + +} +# write count as table +write.table(txi_out$counts, file=opt$out_file, row.names = TRUE, col.names = TRUE, quote = FALSE, sep = "\t")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tximport.xml Wed Oct 09 15:38:21 2019 -0400 @@ -0,0 +1,228 @@ +<tool name="tximport" id="tximport" version="0.1"> + <description> Summarize transcript-level estimates for gene-level analysis </description> + <requirements> + <requirement type="package">bioconductor-tximport</requirement> + <requirement type="package" version="1.34.1">bioconductor-genomicfeatures</requirement> + <requirement type="package" version="1.20.2">r-getopt</requirement> + <requirement type="package" version="0.2.20">r-rjson</requirement> + </requirements> + + <stdio> + <exit_code range="1:" level="fatal" description="Error code returned" /> + <regex match="is not TRUE" + source="both" + level="fatal" + description="Execution halted." /> + </stdio> + +<command> + <![CDATA[ +#import json +#if $gene_name_source_selector.gene_name_source == 'external_file': + #if $gene_name_source_selector.gff_source_selector.gff_source == 'history': + #if $gene_name_source_selector.gff_source_selector.gff_tx2gene_selector.mapping_file_option == 'gff_gtf': + ln -s '$gene_name_source_selector.gff_source_selector.gff_tx2gene_selector.own_gff' mapping.gff && + #else: + ln -s '$gene_name_source_selector.gff_source_selector.gff_tx2gene_selector.own_tx2gene' mapping.tab && + #end if + #end if +#end if + +Rscript '${__tool_directory__}/tximport.R' + --base_dir $__tool_directory__ + --format $input_source_selector.input_source + #if $input_source_selector.input_source == 'none': + --txIdCol $input_source_selector.tx_id_col + --abundanceCol $input_source_selector.abundance_col + --countsCol $input_source_selector.counts_col + --lengthCol $input_source_selector.length_col + #end if + #if $gene_name_source_selector.gene_name_source == 'gene_name_column_option': + --geneIdCol $gene_name_source_selector.gene_id_col + #else + #if $gene_name_source_selector.gff_source_selector.gff_source == "history": + #if $gene_name_source_selector.gff_source_selector.gff_tx2gene_selector.mapping_file_option == 'tx2gene': + --tx2gene mapping.tab + #else + --gff_file mapping.gff + #end if + #else: + --tx2gene $gene_name_source_selector.gff_source_selector.tx2gene.fields.path + #end if + #end if + + --countsFromAbundance $counts_from_abundance + #set $count_files = list() + #for $file in $counts_file: + #set $filename_to_element_identifiers = {} + $filename_to_element_identifiers.__setitem__('id',str($file.element_identifier)) + $filename_to_element_identifiers.__setitem__('path',str($file)) + $count_files.append(filename_to_element_identifiers) + #end for + #set $samples_dict = {} + $samples_dict.__setitem__('samples',$count_files) + --countsFiles '#echo json.dumps(samples_dict)#' + --out_file '${gene_level_values}' + +]]></command> + + + +<inputs> + <conditional name="input_source_selector"> + <param name="input_source" type ="select" label="Select the source of the quantification file"> + <option value="salmon" selected="True">Salmon</option> + <option value="sailfish">Sailfish</option> + <option value="alevin">Alevin</option> + <option value="kallisto">Kallisto</option> + <option value="rsem">RSEM</option> + <option value="stringtie">Stringtie</option> + <option value="none">Custom format (specify the columns)</option> + </param> + <when value="none"> + <param name="tx_id_col" type="text" label="Name of the txID columns"/> + <param name="abundance_col" type="text" label="Name of the abundance column"/> + <param name="counts_col" type="text" label="Name of the counts column"/> + <param name="length_col" type="text" label="Name of the length column"/> + </when> + <when value="salmon"/> + <when value="sailfish"/> + <when value="alevin"/> + <when value="kallisto"/> + <when value="rsem"/> + <when value="stringtie"/> + </conditional> + <conditional name="gene_name_source_selector" > + <param name="gene_name_source" type="select" label="Is the gene name part of the counts file or will be obtained from an external file?"> + <option value="external_file" selected="True">Use an external file to map transcript to gene names</option> + <option value="gene_name_column_option">Gene name is a column of the input file</option> + </param> + <when value="gene_name_column_option"> + <param name="gene_name_column" type="text" label="Name of the column containing the geneID"/> + </when> + <when value="external_file"> + <conditional name="gff_source_selector"> + <param name="gff_source" type="select" label="Select a GFF from your history or use a built-in file?"> + <option value="built-in" selected="True">Use a built-in file</option> + <option value="history" >Use one from the history</option> + </param> + <when value="built-in"> + <param name="tx2gene" type="select" label="Select an annotation version" help="If the build of your interest is not listed contact your Galaxy admin"> + <options from_data_table="tx2gene_table"> + <filter type="sort_by" column="1"/> + <validator type="no_options" message="No files are available for the selected input dataset"/> + </options> + </param> + </when> + <when value="history"> + <conditional name="gff_tx2gene_selector"> + <param name="mapping_file_option" type="select" label="Will you provide a tx2gene or a GFF/GTF file?"> + <option value="tx2gene" selected="True">TranscriptID to GeneID table</option> + <option value="gff_gtf">GTF/GFF file</option> + </param> + <when value="gff_gtf"> + <param name="own_gff" type="data" format="gff" label="Select your GFF file"/> + </when> + <when value="tx2gene"> + <param name="own_tx2gene" type="data" format="tabular" label="Select your TranscriptID to GeneID table file"/> + </when> + </conditional> + </when> + </conditional> + </when> + </conditional> + <param name="counts_from_abundance" type="select" label="Summarization using the abundance (TPM) values?"> + <option value="no">No</option> + <option value="scaled_TPM">Scaled up to library size</option> + <option value="length_scaled_TPM">Scaled using the avg. transcript legth over samples and then the library size</option> + <option value="dtu_scaled_TPM">Scaled using the median transcript length among isoforms of a gene, and then the library size</option> + </param> + <param name="counts_file" type="data" format="tabular" multiple="true" label="Counts file(s)"/> +</inputs> + + +<outputs> + <data format="tabular" name="gene_level_values" label="Gene level summarization on ${on_string}"/> +</outputs> + + +<tests> + <test> + <param name="input_source" value="salmon"/> + <param name="gene_name_source" value="external_file"/> + <param name="counts_from_abundance" value="no"/> + <param name="gff_source" value="history"/> + <param name="mapping_file_option" value="tx2gene"/> + <param name="own_tx2gene" value="tx2gene.tab"/> + <param name="counts_file" value="salmon_sample2.tab,salmon_sample1.tab" /> + <output name="gene_level_values"> + <assert_contents> + <has_text_matching expression="salmon_sample2.tab\tsalmon_sample1.tab" /> + <has_text_matching expression="AT1G01010\t156\t156" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_source" value="salmon"/> + <param name="gene_name_source" value="external_file"/> + <param name="counts_from_abundance" value="no"/> + <param name="gff_source" value="history"/> + <param name="mapping_file_option" value="gff_gtf"/> + <param name="own_gff" value="Araport11_subset.gff3"/> + <param name="counts_file" value="salmon_sample2.tab,salmon_sample1.tab" /> + <output name="gene_level_values"> + <assert_contents> + <has_text_matching expression="salmon_sample2.tab\tsalmon_sample1.tab" /> + <has_text_matching expression="AT1G01010\t156\t156" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_source" value="salmon"/> + <param name="gene_name_source" value="external_file"/> + <param name="counts_from_abundance" value="no"/> + <param name="gff_source" value="built-in"/> + <param name="tx2gene" value="Ath_Araport11_subset"/> + <param name="counts_file" value="salmon_sample2.tab,salmon_sample1.tab" /> + <output name="gene_level_values"> + <assert_contents> + <has_text_matching expression="salmon_sample2.tab\tsalmon_sample1.tab" /> + <has_text_matching expression="AT1G01010\t156\t156" /> + </assert_contents> + </output> + </test> + <!-- Test input with custom format --> + <test> + <param name="input_source" value="none"/> + <param name="tx_id_col" value="Transcript_id_here"/> + <param name="abundance_col" value="Abundance_goes_here"/> + <param name="counts_col" value="Here_goes_the_counts"/> + <param name="length_col" value="Here_goes_the_length"/> + <param name="counts_from_abundance" value="no"/> + <param name="gff_source" value="built-in"/> + <param name="tx2gene" value="Ath_Araport11_subset"/> + <param name="counts_file" value="custom_sample.tab" /> + <output name="gene_level_values"> + <assert_contents> + <has_text_matching expression="custom_sample.tab" /> + <has_text_matching expression="AT1G01010\t156" /> + </assert_contents> + </output> + + </test> + +</tests> + <help> + +.. class:: infomark + +Current version only works in 'merge' mode: A single table of gene summarizations is generated with one column for each sample file. +Take into account that DEseq2 package in Galaxy requires one table per sample. + </help> + + <citations> + <citation type="doi">doi:10.18129/B9.bioc.tximport</citation> + </citations> + +</tool> +