# HG changeset patch # User iuc # Date 1534355680 14400 # Node ID 32e1bfc6b7b2faf81aebefd178a0c672c9f6a5fa planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 8908da9cdd112ae0943dbf1eccb221e84cd99ca7 diff -r 000000000000 -r 32e1bfc6b7b2 scpipe.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scpipe.R Wed Aug 15 13:54:40 2018 -0400 @@ -0,0 +1,149 @@ +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +suppressPackageStartupMessages({ + library(scPipe) + library(SingleCellExperiment) + library(optparse) + library(readr) + library(ggplot2) + library(plotly) + library(DT) + library(scater) + library(scran) + library(scales) + library(Rtsne) +}) + +option_list <- list( + make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"), + make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"), + make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"), + make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"), + make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"), + make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"), + make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"), + make_option(c("-bl1","--bl1"), type="integer", help="Barcode length in Read 1"), + make_option(c("-bs2","--bs2"), type="integer", help="Barcode start in Read 2"), + make_option(c("-bl2","--bl2"), type="integer", help="Barcode length in Read 2"), + make_option(c("-us","--us"), type="integer", help="UMI start in Read 2"), + make_option(c("-ul","--ul"), type="integer", help="UMI length in Read 2"), + make_option(c("-rmlow","--rmlow"), type="logical", help="Remove reads with N in barcode or UMI"), + make_option(c("-rmN","--rmN"), type="logical", help="Remove reads with low quality"), + make_option(c("-minq","--minq"), type="integer", help="Minimum read quality"), + make_option(c("-numbq","--numbq"), type="integer", help="Maximum number of bases below minq"), + make_option(c("-stnd","--stnd"), type="logical", help="Perform strand-specific mapping"), + make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"), + make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"), + make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"), + make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"), + make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"), + make_option(c("-report","--report"), type="logical", help="HTML report of plots"), + make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"), + make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +args = parse_args(parser) + +fa_fn = args$fasta +anno_fn = args$exons +fq_R1 = args$read1 +fq_R2 = args$read2 +read_structure = list( + bs1 = args$bs1, # barcode start position in fq_R1, -1 indicates no barcode + bl1 = args$bl1, # barcode length in fq_R1, 0 since no barcode present + bs2 = args$bs2, # barcode start position in fq_R2 + bl2 = args$bl2, # barcode length in fq_R2 + us = args$us, # UMI start position in fq_R2 + ul = args$ul # UMI length +) + +if (args$us == -1) { + has_umi = FALSE +} else { + has_umi = TRUE +} + +filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq) + +# Outputs +out_dir = "." +fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index")) +combined_fastq = file.path(out_dir, "combined.fastq") +aligned_bam = file.path(out_dir, "aligned.bam") +mapped_bam = file.path(out_dir, "aligned.mapped.bam") + + +print("Trimming barcodes") +sc_trim_barcode(combined_fastq, + fq_R1, + fq_R2, + read_structure=read_structure, + filter_settings=filter_settings) + +print("Building genome index") +Rsubread::buildindex(basename=fasta_index, reference=fa_fn) + +print("Aligning reads to genome") +Rsubread::align(index=fasta_index, + readfile1=combined_fastq, + output_file=aligned_bam, + nthreads=args$nthreads) + +if (!is.null(args$barcodes)) { + barcode_anno=args$barcodes +} else { + print("Detecting barcodes") + # detect 10X barcodes and generate sample_index.csv file + barcode_anno = "sample_index.csv" + sc_detect_bc( + infq=combined_fastq, + outcsv=barcode_anno, + bc_len=read_structure$bl2, + max_reads=args$max_reads, + min_count=args$min_count, + max_mismatch=args$max_mis + ) +} + +print("Assigning reads to exons") +sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd) + +print("De-multiplexing data") +sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi) + +print("Counting genes") +sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl) + +print("Creating SingleCellExperiment object") +sce <- create_sce_by_dir(out_dir) + +if (!is.null(args$report)) { +print("Creating report") +create_report(sample_name=args$samplename, + outdir=out_dir, + r1=fq_R1, + r2=fq_R2, + outfq=combined_fastq, + read_structure=read_structure, + filter_settings=filter_settings, + align_bam=aligned_bam, + genome_index=fasta_index, + map_bam=mapped_bam, + exon_anno=anno_fn, + stnd=args$stnd, + fix_chr=FALSE, + barcode_anno=barcode_anno, + max_mis=args$max_mis, + UMI_cor=args$UMI_cor, + gene_fl=args$gene_fl) +} + +if (!is.null(args$rdata) ) { + save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) +} + +sessionInfo() diff -r 000000000000 -r 32e1bfc6b7b2 scpipe.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scpipe.xml Wed Aug 15 13:54:40 2018 -0400 @@ -0,0 +1,264 @@ + + - preprocessing pipeline for single cell RNA-seq + + bioconductor-scpipe + bioconductor-rsubread + r-knitr + r-rmarkdown + r-readr + r-plotly + r-dt + bioconductor-scater + bioconductor-scran + r-rtsne + + r-ggplot2 + r-optparse + + /dev/null | grep -v -i "WARNING: ")", Rsubread version" $(R --vanilla --slave -e "library(Rsubread); cat(sessionInfo()\$otherPkgs\$Rsubread\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", knitr version" $(R --vanilla --slave -e "library(knitr); cat(sessionInfo()\$otherPkgs\$knitr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmarkdown version" $(R --vanilla --slave -e "library(rmarkdown); cat(sessionInfo()\$otherPkgs\$rmarkdown\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", readr version" $(R --vanilla --slave -e "library(readr); cat(sessionInfo()\$otherPkgs\$readr\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", plotly version" $(R --vanilla --slave -e "library(plotly); cat(sessionInfo()\$otherPkgs\$plotly\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", DT version" $(R --vanilla --slave -e "library(DT); cat(sessionInfo()\$otherPkgs\$DT\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scater version" $(R --vanilla --slave -e "library(scater); cat(sessionInfo()\$otherPkgs\$scater\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", scran version" $(R --vanilla --slave -e "library(scran); cat(sessionInfo()\$otherPkgs\$scran\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtsne version" $(R --vanilla --slave -e "library(Rtsne); cat(sessionInfo()\$otherPkgs\$Rtsne\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", ggplot2 version" $(R --vanilla --slave -e "library(ggplot2); cat(sessionInfo()\$otherPkgs\$ggplot2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]> + gene_count.tsv + +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + +
+
+ + + + + report + + + rscript + + + rdata + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1371/journal.pcbi.1006361 + +
diff -r 000000000000 -r 32e1bfc6b7b2 test-data/CB51_MT19_R1.gz Binary file test-data/CB51_MT19_R1.gz has changed diff -r 000000000000 -r 32e1bfc6b7b2 test-data/CB51_MT19_R2.gz Binary file test-data/CB51_MT19_R2.gz has changed diff -r 000000000000 -r 32e1bfc6b7b2 test-data/mm10_MT19.fa.gz Binary file test-data/mm10_MT19.fa.gz has changed diff -r 000000000000 -r 32e1bfc6b7b2 test-data/mm10_MT19.gff3.gz Binary file test-data/mm10_MT19.gff3.gz has changed diff -r 000000000000 -r 32e1bfc6b7b2 tool-data/all_fasta.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Wed Aug 15 13:54:40 2018 -0400 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the genome and transcriptome fasta files +#under the "genome" directory (a directory that contains a directory +#for each build. This file has the format (white space characters are +#TAB characters): +# +# +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +#hg19full.87 hg19 Human (Homo sapiens): hg19 Full Trans v87 /path/to/genome/hg19/hg19full.cdna.all.fa + +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# diff -r 000000000000 -r 32e1bfc6b7b2 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Aug 15 13:54:40 2018 -0400 @@ -0,0 +1,7 @@ + + + + value, dbkey, name, path + +
+
diff -r 000000000000 -r 32e1bfc6b7b2 tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed Aug 15 13:54:40 2018 -0400 @@ -0,0 +1,8 @@ + + + + value, dbkey, name, path + +
+
+