# HG changeset patch # User mingchen0919 # Date 1514577798 18000 # Node ID 5af86972b4080c5c28bc0bef3a476db937d1dedb planemo upload diff -r 000000000000 -r 5af86972b408 rmarkdown_feature_counts.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmarkdown_feature_counts.Rmd Fri Dec 29 15:03:18 2017 -0500 @@ -0,0 +1,104 @@ +--- +title: 'Feature Counts' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = opt$echo, + error = TRUE +) +``` + + +# User input + +```{r 'user input'} +opt +``` + +# Calculate feature counts + +```{r 'ste[ 2'} +res = featureCounts( + files = strsplit(opt$input_bam_paths, ',')[[1]], + # annotation + annot.inbuilt=opt$annot_inbuilt, + annot.ext=opt$annot_ext, + isGTFAnnotationFile=opt$isGTFAnnotationFile, + GTF.featureType=opt$gtf_feature_type, + GTF.attrType=opt$gtf_attr_type, + chrAliases=opt$chr_aliases, + + # level of summarization + useMetaFeatures=opt$use_meta_features, + + # overlap between reads and features + allowMultiOverlap=opt$allow_multi_overlap, + minOverlap=opt$min_overlap, + largestOverlap=opt$largest_overlap, + readExtension5=opt$read_extension_5, + readExtension3=opt$read_extension_3, + read2pos=opt$read_2_pos, + + # multi-mapping reads + countMultiMappingReads=opt$count_multi_mapping_reads, + fraction=opt$fraction, + + # read filtering + minMQS=opt$min_mqs, + splitOnly=opt$split_only, + nonSplitOnly=opt$non_split_only, + primaryOnly=opt$primary_only, + ignoreDup=opt$ignore_dup, + + # strandness + strandSpecific=opt$strand_specific, + + # exon-exon junctions + juncCounts=opt$junc_counts, + genome=opt$genome, + + # parameters specific to paired end reads + isPairedEnd=opt$is_paired_end, + requireBothEndsMapped=opt$require_both_ends_mapped, + checkFragLength=opt$check_frag_length, + minFragLength=opt$min_frag_length, + maxFragLength=opt$max_frag_length, + countChimericFragments=opt$count_chimeric_fragments, + autosort=opt$auto_sort, + + # miscellaneous + nthreads=opt$n_threads, + maxMOp=opt$max_mop, + reportReads=opt$report_reads +) +``` + +# Write counts into CSV file + +```{r} +colnames(res$counts) = strsplit(opt$input_bam_names, ',')[[1]] +# write count into csv file +write.table(res$counts, file = 'feature_counts.txt') +``` + +Display the first 100 rows. + +```{r} +datatable(head(res$counts, 100)) +``` + +# Save results into RData file + +```{r} +save(res, file = 'feature_counts.RData') +str(res) +``` + + diff -r 000000000000 -r 5af86972b408 rmarkdown_feature_counts.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmarkdown_feature_counts.xml Fri Dec 29 15:03:18 2017 -0500 @@ -0,0 +1,206 @@ + + + pandoc + r-getopt + r-rmarkdown + r-htmltools + r-dplyr + r-dt + bioconductor-rsubread + + This function assigns mapped sequencing reads to genomic features + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5af86972b408 rmarkdown_feature_counts_render.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmarkdown_feature_counts_render.R Fri Dec 29 15:03:18 2017 -0500 @@ -0,0 +1,118 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + + ##============== load packages =============================== + library(getopt) + library(rmarkdown) + library(htmltools) + library(dplyr) + library(Rsubread) + library(DT) + ##============================================================ + + ##---------below is the code for rendering .Rmd templates----- + + ##=============STEP 1: handle command line arguments========== + ## + ##============================================================ + # column 1: the long flag name + # column 2: the short flag alias. A SINGLE character string + # column 3: argument mask + # 0: no argument + # 1: argument required + # 2: argument is optional + # column 4: date type to which the flag's argument shall be cast. + # possible values: logical, integer, double, complex, character. + #------------------------------------------------------------- + #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ + # 1. short flag alias should match the flag in the command section in the XML file. + # 2. long flag name can be any legal R variable names + # 3. two names in args_list can have common string but one name should not be a part of another name. + # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. + #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + args_list=list() + ##------- 1. input data --------------------- + args_list$e = c('echo', 'e', '1', 'logical') + args_list$a = c('input_bam_paths', 'a', '1', 'character') + args_list$N = c('input_bam_names', 'N', '1', 'character') + args_list$b = c('annot_inbuilt', 'b', '1', 'character') + args_list$c = c('annot_ext', 'c', '1', 'character') + args_list$f = c('isGTFAnnotationFile', 'f', '1', 'logical') + args_list$g = c('gtf_feature_type', 'g', '1', 'character') + args_list$h = c('gtf_attr_type', 'h', '1', 'character') + args_list$i = c('chr_aliases', 'i', '2', 'character') + args_list$j = c('use_meta_features', 'j', '1', 'logical') + args_list$k = c('allow_multi_overlap', 'k', '1', 'logical') + args_list$l = c('min_overlap', 'l', '1', 'integer') + args_list$m = c('largest_overlap', 'm', '1', 'logical') + args_list$n = c('read_extension_5', 'n', '1', 'integer') + args_list$o = c('read_extension_3', 'o', '1', 'integer') + args_list$p = c('read_2_pos', 'p', '1', 'character') + args_list$q = c('count_multi_mapping_reads', 'q', '1', 'logical') + args_list$u = c('fraction', 'u', '1', 'logical') + args_list$v= c('min_mqs', 'v', '1', 'integer') + args_list$w= c('split_only', 'w', '1', 'logical') + args_list$x= c('non_split_only', 'x', '1', 'logical') + args_list$y= c('primary_only', 'y', '1', 'logical') + args_list$z= c('ignore_dup', 'z', '1', 'logical') + args_list$A= c('strand_specific', 'A', '1', 'integer') + args_list$B= c('junc_counts', 'B', '1', 'logical') + args_list$C= c('genome', 'C', '1', 'character') + args_list$D= c('is_paired_end', 'D', '1', 'logical') + args_list$E= c('require_both_ends_mapped', 'E', '1', 'logical') + args_list$F= c('check_frag_length', 'F', '1', 'logical') + args_list$G= c('min_frag_length', 'G', '1', 'integer') + args_list$H= c('max_frag_length', 'H', '1', 'integer') + args_list$I= c('count_chimeric_fragments', 'I', '1', 'logical') + args_list$J= c('auto_sort', 'J', '1', 'logical') + args_list$K= c('n_threads', 'K', '1', 'integer') + args_list$L= c('max_mop', 'L', '1', 'integer') + args_list$M= c('report_reads', 'M', '1', 'logical') + + ##--------2. output report and outputs -------------- + args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') + args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') + args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character') + ##--------3. .Rmd templates in the tool directory ---------- + args_list$TOOL_TEMPLATE_RMD = c('tool_template_rmd', 't', '1', 'character') + ##----------------------------------------------------------- + opt = getopt(t(as.data.frame(args_list))) + + + + ##=======STEP 2: create report directory (optional)========== + ## + ##=========================================================== + dir.create(opt$report_dir) + + ##=STEP 3: replace placeholders in .Rmd with argument values= + ## + ##=========================================================== + #++ need to replace placeholders with args values one by one+ + readLines(opt$tool_template_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('REPORT_DIR', opt$report_dir, x) + }) %>% + (function(x) { + fileConn = file('tool_template.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + + ##=============STEP 4: render .Rmd templates================= + ## + ##=========================================================== + render('tool_template.Rmd', output_file = opt$report_html) + + + ##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output=============================