Mercurial > repos > iuc > edger
changeset 14:070900306913 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit 0c79416d9612d0ebdcb7c3c0543d510e11bb0774
| author | iuc | 
|---|---|
| date | Wed, 11 Oct 2023 11:13:07 +0000 | 
| parents | 0cb907a2a810 | 
| children | 5bf899c13979 | 
| files | edger.R edger.xml test-data/contrasts_file.txt | 
| diffstat | 3 files changed, 120 insertions(+), 13 deletions(-) [+] | 
line wrap: on
 line diff
--- a/edger.R Wed Sep 27 19:53:04 2023 +0000 +++ b/edger.R Wed Oct 11 11:13:07 2023 +0000 @@ -8,6 +8,7 @@ # matrixPath", "m", 2, "character" -Path to count matrix # factFile", "f", 2, "character" -Path to factor information file # factInput", "i", 2, "character" -String containing factors if manually input +# formula", "F", 2, "character". -String containing a formula to override default use of factInput # annoPath", "a", 2, "character" -Path to input containing gene annotations # contrastData", "C", 1, "character" -String containing contrasts of interest # cpmReq", "c", 2, "double" -Float specifying cpm requirement @@ -159,6 +160,7 @@ "filesPath", "j", 2, "character", "matrixPath", "m", 2, "character", "factFile", "f", 2, "character", + "formula", "F", 2, "character", "factInput", "i", 2, "character", "annoPath", "a", 2, "character", "contrastData", "C", 1, "character", @@ -312,8 +314,13 @@ out_path <- opt$outPath dir.create(out_path, showWarnings = FALSE) -# Split up contrasts separated by comma into a vector then sanitise -contrast_data <- unlist(strsplit(opt$contrastData, split = ",")) +# Check if contrastData is a file or not +if (file.exists(opt$contrastData)) { + contrast_data <- unlist(read.table(opt$contrastData, sep = "\t", header = TRUE)[[1]]) +} else { + # Split up contrasts separated by comma into a vector then sanitise + contrast_data <- unlist(strsplit(opt$contrastData, split = ",")) +} contrast_data <- sanitise_equation(contrast_data) contrast_data <- gsub(" ", ".", contrast_data, fixed = TRUE) @@ -397,10 +404,17 @@ data$genes <- genes - -formula <- "~0" -for (i in seq_along(factor_list)) { - formula <- paste(formula, factor_list[i], sep = "+") +if (!is.null(opt$formula)) { + formula <- opt$formula + # sanitisation can be getting rid of the "~" + if (!startsWith(formula, "~")) { + formula <- paste0("~", formula) + } +} else { + formula <- "~0" + for (i in seq_along(factor_list)) { + formula <- paste(formula, factor_list[i], sep = "+") + } } formula <- formula(formula)
--- a/edger.xml Wed Sep 27 19:53:04 2023 +0000 +++ b/edger.xml Wed Oct 11 11:13:07 2023 +0000 @@ -4,7 +4,7 @@ </description> <macros> <token name="@TOOL_VERSION@">3.36.0</token> - <token name="@VERSION_SUFFIX@">2</token> + <token name="@VERSION_SUFFIX@">3</token> </macros> <edam_topics> <edam_topic>topic_3308</edam_topic> @@ -68,7 +68,15 @@ -a '$anno.geneanno' #end if --C '${ ','.join( ['%s' % $x.contrast for x in $rep_contrast] ) }' +#if $formula: + -F '$formula' +#end if + +#if $contrasts.contrastOpt == 'file': + -C '$contrasts.cinfo' +#else: + -C '${ ','.join( ['%s' % $x.contrast for x in $contrasts.rep_contrast] ) }' +#end if #if $f.filt.filt_select == 'yes': #if $f.filt.cformat.format_select == 'cpm': @@ -176,13 +184,45 @@ </when> <when value="no"/> </conditional> + <!-- Optional formula --> + <param name="formula" type="text" optional="true" label="Formula for linear model" help="An optional formula for the EdgeR linear model, this will override the use of the fields in factors as a simple sum. The formula can only use elements available in the factors file. This needs to be exactly as EdgeR expect the formula, ie. `~ 0 + factor_A + factor_B:factor_C`. See EdgeR documentation for more details."> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_"/> + <add value="-"/> + <add value="+"/> + <add value="*"/> + <add value="/"/> + <add value="^"/> + <add value=":"/> + <add value="."/> + <add value="~"/> + <add value=" "/> + <add value="("/> + <add value=")"/> + <add value="@"/> + <add value="$"/> + </valid> + </sanitizer> + </param> <!-- Contrasts --> - <repeat name="rep_contrast" title="Contrast" min="1" default="1"> - <param name="contrast" type="text" label="Contrast of Interest" help="Names of two groups to compare separated by a hyphen e.g. Mut-WT. If the order is Mut-WT the fold changes in the results will be up/down in Mut relative to WT. If you have more than one contrast enter each separately using the Insert Contrast button below. For differences between contrasts use e.g. (MT.t1-MT.t0)-(WT.t1-WT.t0). For more info, see Chapter 8 in the limma User's guide: https://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf or https://bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf page 36 for nested comparisons."> - <validator type="empty_field"/> - <validator type="regex" message="Please only use letters, numbers, parentheses or underscores">^[\w\-()]+$</validator> + <conditional name="contrasts"> + <param name="contrastOpt" type="select" label="Input contrasts manually or through a file"> + <option value="manual">manually</option> + <option value="file">file</option> </param> - </repeat> + <when value="manual"> + <repeat name="rep_contrast" title="Contrast" min="1" default="1"> + <param name="contrast" type="text" label="Contrast of Interest" help="Names of two groups to compare separated by a hyphen e.g. Mut-WT. If the order is Mut-WT the fold changes in the results will be up/down in Mut relative to WT. If you have more than one contrast enter each separately using the Insert Contrast button below. For differences between contrasts use e.g. (MT.t1-MT.t0)-(WT.t1-WT.t0). For more info, see Chapter 8 in the limma User's guide: https://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf or https://bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf page 36 for nested comparisons."> + <validator type="empty_field"/> + <validator type="regex" message="Please only use letters, numbers, parentheses or underscores">^[\w\-()]+$</validator> + </param> + </repeat> + </when> + <when value="file"> + <param name="cinfo" optional="true" type="data" format="tabular" label="Contrasts File" help="Setting this file will ignore any manually added contrasts above, make sure to remove any contrast fields above pressing the trash bin icon, or the tool will fail. First line of the file must be a header, below that each separate contrast should be on a line. Contrast formulas need to be based on ther factors data and potentially the formula provided. See EdgeR documentation on contrasts for more details."/> + </when> + </conditional> <!-- Filter Options --> <section name="f" expanded="false" title="Filter Low Counts"> <conditional name="filt"> @@ -264,6 +304,7 @@ <param name="factorName" value="Genotype"/> <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT"/> </repeat> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -302,6 +343,7 @@ <param name="factorName" value="Genotype"/> <param name="groupNames" value="MutA,MutA,MutA,MutB,MutB,MutB,WTA,WTA,WTA,WTB,WTB,WTB"/> </repeat> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="(MutA-MutB)-(WTA-WTB)"/> </repeat> @@ -333,6 +375,7 @@ <param name="factorName" value="Genotype"/> <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT"/> </repeat> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -356,6 +399,7 @@ <param name="factorName" value="Genotype"/> <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT"/> </repeat> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -383,6 +427,7 @@ <param name="factorName" value="Batch"/> <param name="groupNames" value="b1,b2,b3,b1,b2,b3"/> </repeat> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -402,6 +447,7 @@ <param name="ffile" value="yes"/> <param name="finfo" value="factorinfo.txt"/> <param name="counts" value="matrix.txt"/> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -424,6 +470,7 @@ <param name="factorName" value="Genotype"/> <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT"/> </repeat> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -451,6 +498,7 @@ <param name="factorName" value="Genotype"/> <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT"/> </repeat> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -494,6 +542,7 @@ </repeat> <param name="annoOpt" value="yes"/> <param name="geneanno" value="anno.txt"/> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -530,6 +579,7 @@ <param name="factorName" value="Genotype"/> <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT"/> </repeat> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -565,6 +615,7 @@ <param name="factorName" value="Genotype"/> <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT"/> </repeat> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -599,6 +650,7 @@ <param name="factorName" value="Genotype"/> <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT"/> </repeat> + <param name="contrastOpt" value="manual"/> <repeat name="rep_contrast"> <param name="contrast" value="Mut-WT"/> </repeat> @@ -626,6 +678,36 @@ </element> </output_collection> </test> + <!-- Ensure formula and contrast file work --> + <test expect_num_outputs="2"> + <param name="format" value="matrix"/> + <param name="counts" value="matrix.txt"/> + <repeat name="rep_factor"> + <param name="factorName" value="Genotype"/> + <param name="groupNames" value="Mut,Mut,Mut,WT,WT,WT"/> + </repeat> + <repeat name="rep_factor"> + <param name="factorName" value="Batch"/> + <param name="groupNames" value="b1,b2,b3,b1,b2,b3"/> + </repeat> + <param name="contrastOpt" value="file"/> + <param name="cinfo" value="contrasts_file.txt"/> + <param name="formula" value="~ 0 + Genotype + Batch"/> + <param name="normalisationOption" value="TMM"/> + <output_collection name="outTables" count="2"> + <element name="edgeR_Mut-WT" ftype="tabular"> + <assert_contents> + <has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR"/> + <has_text_matching expression="11304.*0.4584"/> + </assert_contents> + </element> + <element name="edgeR_WT-Mut" ftype="tabular"> + <assert_contents> + <has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR"/> + </assert_contents> + </element> + </output_collection> + </test> </tests> <help><