| Previous changeset 1:fa62657e9b57 (2017-04-26) Next changeset 3:896cdffe06ff (2017-04-26) |
|
Commit message:
Deleted selected files |
|
removed:
ballgown.R ballgown.xml |
| b |
| diff -r fa62657e9b57 -r eb1206832359 ballgown.R --- a/ballgown.R Wed Apr 26 08:29:56 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,73 +0,0 @@ -#!/usr/bin/Rscript - -# Enabling commands line arguments. Using optparse which allows to use options. -# ---------------------------------------------------------------------------------------- - -suppressMessages(library(optparse, warn.conflicts = FALSE)) -opt_list=list( -make_option(c("-d", "--directory"), type="character", default=NULL, help="directory containing the samples", metavar="character"), -make_option(c("-p", "--phendat"), type="character", default=NULL, help="phenotype data(must be a .csv file)", metavar="character"), -make_option(c("-t","--outputtranscript"), type="character", default="output_transcript.csv", help="output_transcript.csv: contains the transcripts of the expirements", metavar="character"), -make_option(c("-g","--outputgenes"), type="character", default="output_genes.csv", help="output_genes.csv: contains the genes of the expirements", metavar="character"), -make_option(c("-e","--texpression"), type="double", default="0.5", help="transcripts expression filter", metavar="character"), -make_option(c("--bgout"), type="character", default="", help="save the ballgown object created in the process", metavar="character") -) -opt_parser=OptionParser(option_list=opt_list) -opt=parse_args(opt_parser) - -# Loading required libraries. suppressMessages() remove all noisy attachement messages -# ---------------------------------------------------------------------------------------- - -suppressMessages(library(ballgown, warn.conflicts = FALSE)) -suppressMessages(library(genefilter, warn.conflicts = FALSE)) -suppressMessages(library(dplyr, warn.conflicts = FALSE)) - -# Setup for the tool with some bases variables. -# ---------------------------------------------------------------------------------------- - - -filtstr = opt$texpression -pdat = 2 -phendata = read.csv(opt$phendat) -setwd(opt$dir) - -# Checking if the pdata file has the right samples names. -# ---------------------------------------------------------------------------------------- - -if (all(phendata$ids == list.files(".")) != TRUE) -{ - stop("Your phenotype data table does not match the samples names. ") -} - -# Creation of the ballgown object based on data -# ---------------------------------------------------------------------------------------- -bgi = ballgown(dataDir= "." , samplePattern="", pData = phendata, verbose = FALSE) - -# Filter the genes with an expression superior to the input filter -# ---------------------------------------------------------------------------------------- -bgi_filt= subset(bgi, paste("rowVars(texpr(bgi)) >",filtstr), genomesubset = TRUE) - -# Creating the variables containing the transcripts and the genes and sorting them through the arrange() command. -# Checking if there's one or more adjust variables in the phenotype data file -# ---------------------------------------------------------------------------------------- - -if (ncol(pData(bgi))<=3) { - results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") - results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") -} else { - results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") - results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") -} - -results_transcripts = data.frame(geneNames=ballgown::geneNames(bgi_filt), geneIDs=ballgown::geneIDs(bgi_filt), results_transcripts) -results_transcripts = arrange(results_transcripts,pval) -results_genes = arrange(results_genes,pval) - -# Main output of the wrapper, two .csv files containing the genes and transcripts with their qvalue and pvalue -#This part also output the data of the ballgown object created in the process and save it in a R data file -# ---------------------------------------------------------------------------------------- -write.csv(results_transcripts, opt$outputtranscript, row.names = FALSE) -write.csv(results_genes, opt$outputgenes, row.names = FALSE) -if (opt$bgout != ""){ - save(bgi, file=opt$bgout) -} |
| b |
| diff -r fa62657e9b57 -r eb1206832359 ballgown.xml --- a/ballgown.xml Wed Apr 26 08:29:56 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| b'@@ -1,235 +0,0 @@\n-<tool id="ballgown" name="Ballgown" version="0.5.0" workflow_compatible="true">\n- <description>Flexible, isoform-level differential expression analysis</description>\n- <requirements>\n- <requirement type="package" version="2.2.0">bioconductor-ballgown</requirement>\n- <requirement type="package" version="0.5.0">r-dplyr</requirement>\n- <requirement type="package" version="1.3.2">r-optparse</requirement>\n-\n- </requirements>\n- <command interpreter="Rscript" detect_errors="aggressive">\n-\t\t##------------------------------------------------------------------------------------\n-\t\t## This function reads the input file with the mapping between samples and files\n-\t\t## E.g. of result:\n-\t\t## mapping = {\n-\t\t## "e2t.ctab" : "sample1",\n-\t\t## "other.ctab" : "sample2",\n-\t\t## "i2t.ctab" : "sample1",\n-\t\t## "t_data.ctab": "sample1"\n-\t\t## ...\n-\t\t## }\n-\t\t##------------------------------------------------------------------------------------\n-\t\t#def read_sample_mapping_file(sample_mapping_file):\n-\t\t\t#try\n-\t\t\t\t#set mapping = {}\n-\t\t\t\t#set file = open($sample_mapping_file.dataset.dataset.get_file_name(),\'r\')\n-\t\t\t\t#for $line in $file:\n-\t\t\t\t\t#set content= $line.strip().split(\'\\t\')\n-\t\t\t\t\t#for $map in $content:\n-\t\t\t\t\t\t#set mapping[$map]= $content[0]\n-\t\t\t\t\t#end for\n-\t\t\t\t#end for\n-\t\t\t\t#return $mapping\n-\t\t\t#except\n-\t\t\t\t#return None\n-\t\t\t#end try\n-\t\t#end def\n-\n-\t\t##------------------------------------------------------------------------------------\n-\t\t## This function returns the name of the sample associated to a given file\n-\t\t##------------------------------------------------------------------------------------\n-\t\t#def get_sample_name($dataset, $sample_mapping):\n-\t\t\t##If the file with samples mapping was provided\n-\t\t\t#if $sample_mapping != None:\n-\t\t\t\t#return $sample_mapping.get($dataset.name, None)\n-\t\t\t##Otherwise with extract the sample name from the filename\n-\t\t\t#else:\n-\t\t\t\t#return str($dataset.element_identifier)\n-\t\t\t#end if\n-\t\t#end def\n-\n-\t\t##------------------------------------------------------------------------------------\n-\t\t## This function reads a dataset or list of datasets and sets the corresponding value\n-\t\t## in the $result variable\n-\t\t## e.g. of result\n-\t\t##\'sample1\' : {\n-\t\t## \'e_data\': \'/export/galaxy-central/database/files/000/dataset_13.dat\'\n-\t\t## \'i_data\': \'/export/galaxy-central/database/files/000/dataset_10.dat\',\n-\t\t## \'t_data\': \'/export/galaxy-central/database/files/000/dataset_12.dat\',\n-\t\t## \'e2t\': \'/export/galaxy-central/database/files/000/dataset_9.dat\',\n-\t\t## \'i2t\': \'/export/galaxy-central/database/files/000/dataset_11.dat\'\n-\t\t## },\n-\t\t##------------------------------------------------------------------------------------\n-\t\t#def read_input_files($param_name, $param_value, $result, $sample_mapping, $create_if_empty):\n-\t\t\t## If input is a data collection\n-\t\t\t#if isinstance($param_value, list):\n-\t\t\t\t## For each dataset\n-\t\t\t\t#for $dataset in $param_value:\n-\t\t\t\t\t## Get the sample name\n-\t\t\t\t\t#set sample_name = $get_sample_name($dataset, $sample_mapping)\n-\t\t\t\t\t## Check if sample is already registered\n-\t\t\t\t\t#if not($result.has_key($sample_name)):\n-\t\t\t\t\t\t#if ($create_if_empty == True):\n-\t\t\t\t\t\t\t#set result[$sample_name] = {}\n-\t\t\t\t\t\t#else:\n-\t\t\t\t\t\t\t#raise ValueError("Error in input. Please check that input contains all the required files for sample " + $sample_name)\n-\t\t\t\t\t\t#end if\n-\t\t\t\t\t#end if\n-\t\t\t\t\t## Register the file to the sample\n-\t\t\t\t\t#set result[$sample_name][$param_name] = str($dataset.dataset.dataset.get_file_name())\n-\t\t\t\t#end for\n-\t\t\t#else:\n-\t\t\t\t#if not($result.has_key("sample_1")):\n-\t\t\t\t\t#set result["sample_1"] = {}\n-\t\t\t\t#end if\n-\t\t\t\t#set result["sample_1"][$param_name] = str($param_name.dataset.dataset.get_file_name())\n-\t\t\t#end if\n-\t\t\t#return $result\n-\t\t#end def\n-\n-\t\t##------------------------------------------------------------------------------------\n-\t\t## Main body of the tool\n-\t\t##---------------------------------------------------'..b" * mcov: multi-map-corrected average per-base read coverage\n- * mcov_sd: standard deviation of multi-map-corrected per-base coverage\n-- **i_data**: intron- (i.e., junction-) level expression measurements. Tab file or collection of tab files. One row per intron. Columns are i_id (numeric intron id), chr, strand, start, end (genomic location of the intron), and the following expression measurements for each sample:\n- * rcount: number of reads supporting the intron\n- * ucount: number of uniquely mapped reads supporting the intron\n- * mrcount: multi-map-corrected number of reads supporting the intron\n-- **t_data**: transcript-level expression measurements. Tab file or collection of tab files. One row per transcript. Columns are:\n- * t_id: numeric transcript id\n- * chr, strand, start, end: genomic location of the transcript\n- * t_name: Cufflinks-generated transcript id\n- * num_exons: number of exons comprising the transcript\n- * length: transcript length, including both exons and introns\n- * gene_id: gene the transcript belongs to\n- * gene_name: HUGO gene name for the transcript, if known\n- * cov: per-base coverage for the transcript (available for each sample)\n- * FPKM: Cufflinks-estimated FPKM for the transcript (available for each sample)\n-- **e2t**: Tab file or collection of tab files. Table with two columns, e_id and t_id, denoting which exons belong to which transcripts. These ids match the ids in the e_data and t_data tables.\n-- **i2t**: Tab file or collection of tab files. Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. These ids match the ids in the i_data and t_data tables.\n-- samples_names: (optional) Tab file. Table with five columns, one row per sample. Defines which files from the input belong to each sample in the experiment.\n-\n-.. class:: infomark\n-\n-'''TIP''' *Note* Here's an example of a good phenotype data file for your expirement.\n-\n-+--------------+-------------------------+-------------------------+---+\n-|ids |experimental variable 1 |experimental variable 2 |...|\n-+==============+=========================+=========================+===+\n-|sample 1 |value 1 |value 2 |...|\n-+--------------+-------------------------+-------------------------+---+\n-|sample 2 |value 2 |value 1 |...|\n-+--------------+-------------------------+-------------------------+---+\n-|sample 3 |value 1 |value 2 |...|\n-+--------------+-------------------------+-------------------------+---+\n-|sample 4 |value 2 |value 1 |...|\n-+--------------+-------------------------+-------------------------+---+\n-|... |value 1 |value 2 |...|\n-+--------------+-------------------------+-------------------------+---+\n-\n-\n-.. class:: infomark\n-\n-*Note* The minimal transcript expression is a number used to filter the transcripts that\n-are less or not expressed in our samples when compared to the genome\n-\n------------------------\n-**Outputs**\n------------------------\n-\n-This tool has 3 outputs:\n-\n-- **transcripts expression** : this is a csv file containing all the transcripts that are expressed above the transcripts expression value\n-- **genes expression** : this is a csv file containing all the genes that are expressed above the transcripts expression value\n-- **Ballgown object** : this is the ballgown object created during the process. This file can be re-used later for further analysis in a R console.\n-\n-----\n-\n-**Authors**: Th\xc3\xa9o Collard [SLU Global Bioinformatics Centre], Rafael Hern\xc3\xa1ndez de Diego [SLU Global Bioinformatics Centre], and Tomas Klingstr\xc3\xb6m [SLU Global Bioinformatics Centre]\n-\n-Sources are available at https://github.com/CollardT/Ballgown-Wrapper\n-\n- </help>\n-</tool>\n" |