Mercurial > repos > iuc > isoformswitchanalyzer
changeset 5:b3f292d9f35d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/isoformswitchanalyzer commit 7b7d9892618706dad95641831db8b9f83deb86e1
line wrap: on
line diff
--- a/IsoformSwitchAnalyzeR.R Tue May 30 08:39:23 2023 +0000 +++ b/IsoformSwitchAnalyzeR.R Fri Jun 02 10:27:16 2023 +0000 @@ -29,10 +29,23 @@ parser$add_argument("--modeSelector") parser$add_argument("--parentDir", required = FALSE, help = "Parent directory") +parser$add_argument("--condition", + action = "append", + required = FALSE, + help = "Conditions") +parser$add_argument("--sampleID", + action = "append", + required = FALSE, + help = "SampleID") +parser$add_argument("--replicate", + action = "append", + required = FALSE, + help = "Replicates") parser$add_argument("--readLength", required = FALSE, type = "integer", help = "Read length (required for stringtie)") +parser$add_argument("--pairedSamples", action = "store_true", required = FALSE, help = "Paired samples") parser$add_argument("--annotation", required = FALSE, help = "Annotation") parser$add_argument("--stringtieAnnotation", required = FALSE, help = "Stringtie annotation") parser$add_argument("--transcriptome", required = FALSE, help = "Transcriptome") @@ -344,15 +357,22 @@ readLength = args$readLength ) + if (!args$pairedSamples) { ### Make design matrix myDesign <- data.frame( - sampleID = colnames(quantificationData$abundance)[-1], - condition = gsub( - "[[:digit:]]+", - "", - colnames(quantificationData$abundance)[-1] - ) - ) + sampleID = args$sampleID, + condition = args$condition) + } else { + myDesign <- data.frame( + sampleID = args$sampleID, + condition = args$condition, + replicate = args$replicate) + } + + comparisons <- as.data.frame(cbind( + condition_1 = myDesign$condition[1], + condition_2 = myDesign$condition[length(myDesign$condition)] + )) if (args$toolSource == "stringtie") { if (!is.null(args$stringtieAnnotation)) { @@ -365,6 +385,7 @@ isoformNtFasta = args$transcriptome, addAnnotatedORFs = FALSE, showProgress = TRUE, + comparisonsToMake = comparisons, fixStringTieAnnotationProblem = args$fixStringTieAnnotationProblem ) @@ -383,6 +404,7 @@ isoformNtFasta = args$transcriptome, isoformExonAnnoation = args$annotation, showProgress = TRUE, + comparisonsToMake = comparisons, fixStringTieAnnotationProblem = args$fixStringTieAnnotationProblem ) } @@ -395,7 +417,8 @@ removeNonConvensionalChr = args$removeNonConvensionalChr, isoformExonAnnoation = args$annotation, isoformNtFasta = args$transcriptome, - showProgress = TRUE + showProgress = TRUE, + comparisonsToMake = comparisons ) }
--- a/isoformswitchanalyzer.xml Tue May 30 08:39:23 2023 +0000 +++ b/isoformswitchanalyzer.xml Fri Jun 02 10:27:16 2023 +0000 @@ -20,6 +20,9 @@ description="An undefined error occurred, please check your input carefully and contact your administrator." /> </stdio> <command><![CDATA[ + #set $conditions = list() + #set $sampleIDs = list() + #set $replicates = list() #if $functionMode.selector == 'data_import' #if $functionMode.transcriptome.is_of_type("fasta.gz"): ln -s '${functionMode.transcriptome}' './transcriptome.fasta.gz' && @@ -51,21 +54,35 @@ #set $stringtie_annotation = './stringtie_annotation.gtf' #end if #end if + #else if $functionMode.tool_source.selector == 'salmon' + #set $filename = 'quant.sf' #else - #set $filename = 'quant.sf' + #set $filename = 'abundance.tsv' #end if - #for $index in range(len($functionMode.first_factor.trans_counts)): - mkdir './input_files/${functionMode.first_factor.factorLevel}${index}/' && - ln -s $functionMode.first_factor.trans_counts[$index] './input_files/${functionMode.first_factor.factorLevel}${index}/${filename}' && + #for $index in range(len($functionMode.tool_source.first_factor.trans_counts)): + $conditions.append($functionMode.tool_source.first_factor.factorLevel) + $sampleIDs.append(str($functionMode.tool_source.first_factor.factorLevel) + str($index)) + $replicates.append($index) + mkdir './input_files/${functionMode.tool_source.first_factor.factorLevel}${index}/' && + ln -s $functionMode.tool_source.first_factor.trans_counts[$index] './input_files/${functionMode.tool_source.first_factor.factorLevel}${index}/${filename}' && #end for - #for $index in range(len($functionMode.second_factor.trans_counts)): - mkdir './input_files/${functionMode.second_factor.factorLevel}${index}/' && - ln -s $functionMode.second_factor.trans_counts[$index] './input_files/${functionMode.second_factor.factorLevel}${index}/${filename}' && + #for $index in range(len($functionMode.tool_source.second_factor.trans_counts)): + $conditions.append($functionMode.tool_source.second_factor.factorLevel) + $sampleIDs.append(str($functionMode.tool_source.second_factor.factorLevel) + str($index)) + $replicates.append($index) + mkdir './input_files/${functionMode.tool_source.second_factor.factorLevel}${index}/' && + ln -s $functionMode.tool_source.second_factor.trans_counts[$index] './input_files/${functionMode.tool_source.second_factor.factorLevel}${index}/${filename}' && #end for Rscript '${__tool_directory__}/IsoformSwitchAnalyzeR.R' + #for $i, $condition in enumerate($conditions) + --condition $condition + --sampleID $sampleIDs[$i] + --replicate $replicates[$i] + #end for + $functionMode.pairedSamples --modeSelector $functionMode.selector --parentDir './input_files' --annotation $annotation @@ -207,31 +224,20 @@ <option value="second_step">Analysis part two: Plot all isoform switches and their annotation</option> </param> <when value="data_import"> - <section name="first_factor" title="1: Factor level" expanded="true"> - <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor' or 'treated'" - help="Only letters, numbers and underscores will be retained in this field"> - <sanitizer> - <valid initial="string.letters,string.digits"><add value="_" /></valid> - </sanitizer> - </param> - <param name="trans_counts" type="data" format="tabular" multiple="true" label="Transcript-level expression measurements"/> - </section> - <section name="second_factor" title="2: Factor level" expanded="true"> - <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor' or 'treated'" - help="Only letters, numbers and underscores will be retained in this field"> - <sanitizer> - <valid initial="string.letters,string.digits"><add value="_" /></valid> - </sanitizer> - </param> - <param name="trans_counts" type="data" format="tabular" multiple="true" label="Transcript-level expression measurements"/> - </section> <conditional name="tool_source"> <param name="selector" type="select" label="Quantification data source" help="IsoformSwitchAnalyzeR has different functions for importing data from different sources."> <option value="stringtie">StringTie</option> - <option value="salmon">Salmon/Kallisto</option> + <option value="salmon">Salmon</option> + <option value="kallisto">Kallisto</option> </param> - <when value="salmon"/> + <when value="salmon"> + <expand macro="macro_inputs"/> + </when> + <when value="kallisto"> + <expand macro="macro_inputs"/> + </when> <when value="stringtie"> + <expand macro="macro_inputs"/> <param name="averageSize" type="integer" min="0" value="150" label="Average read length" help="Must be the number of base pairs sequenced. e.g. if the data quantified is 75 bp paired ends the the user should supply readLength=75" /> <param argument="fixStringTieAnnotationProblem" type="boolean" truevalue="--fixStringTieAnnotationProblem" falsevalue="" checked="true" @@ -256,13 +262,14 @@ help="Please note this different from a fasta file with the sequences of the entire genome." /> <param argument="removeNonConvensionalChr" type="boolean" truevalue="--removeNonConvensionalChr" falsevalue="" checked="false" label="Remove non-conventional chromosomes" help="These regions are typically used to annotate regions that cannot be associated to a specific region." /> + <param argument="pairedSamples" type="boolean" truevalue="--pairedSamples" falsevalue="" checked="false" label="Paired samples between factors" help="Samples + from different factors belong to the same individual (e.g. samples from same patient from health and cancerous tissues or different parts from the same plant)" /> <param name="countFiles" type="select" label="Generate count matrix files" help="If IsoformSwitchAnalyzeR is used for fixing Stringtie annotation problem, it can generate count files for analyzing differential expression with DESeq2 (when selecting collection) or CEMiTool (when secting the expression matrix format)."> <option value="disabled">Disabled</option> <option value="collection">Collection of count files</option> <option value="matrix">Expression matrix</option> </param> - </when> <!--WRAPPER FIRST STEP SECTION--> @@ -595,23 +602,23 @@ </data> </outputs> <tests> - <!-- Test 01: Data import mode--> + <!-- Test 01: Data import mode--> <test expect_num_outputs="1"> <conditional name="functionMode"> <param name="selector" value="data_import"/> <param name="genomeAnnotation" value="annotation_salmon.gtf.gz"/> <param name="transcriptome" value="transcriptome.fasta.gz"/> <param name="countFiles" value="disabled"/> - <section name="first_factor"> - <param name="factorLevel" value="health"/> - <param name="trans_counts" value="salmon_cond1_rep1.sf,salmon_cond1_rep2.sf"/> - </section> - <section name="second_factor"> - <param name="factorLevel" value="cancer"/> - <param name="trans_counts" value="salmon_cond2_rep1.sf,salmon_cond2_rep2.sf"/> - </section> <conditional name="tool_source"> <param name="selector" value="salmon"/> + <section name="first_factor"> + <param name="factorLevel" value="health"/> + <param name="trans_counts" value="salmon_cond1_rep1.sf,salmon_cond1_rep2.sf"/> + </section> + <section name="second_factor"> + <param name="factorLevel" value="cancer"/> + <param name="trans_counts" value="salmon_cond2_rep1.sf,salmon_cond2_rep2.sf"/> + </section> </conditional> </conditional> <output name="switchList" file="test01.RData" ftype="rdata" compare="sim_size" delta="100"/> @@ -623,16 +630,16 @@ <param name="genomeAnnotation" value="annotation_salmon.gtf.gz"/> <param name="transcriptome" value="transcriptome.fasta.gz"/> <param name="countFiles" value="matrix"/> - <section name="first_factor"> - <param name="factorLevel" value="health"/> - <param name="trans_counts" value="salmon_cond1_rep1.sf,salmon_cond1_rep2.sf"/> - </section> - <section name="second_factor"> - <param name="factorLevel" value="cancer"/> - <param name="trans_counts" value="salmon_cond2_rep1.sf,salmon_cond2_rep2.sf"/> - </section> <conditional name="tool_source"> <param name="selector" value="salmon"/> + <section name="first_factor"> + <param name="factorLevel" value="health"/> + <param name="trans_counts" value="salmon_cond1_rep1.sf,salmon_cond1_rep2.sf"/> + </section> + <section name="second_factor"> + <param name="factorLevel" value="cancer"/> + <param name="trans_counts" value="salmon_cond2_rep1.sf,salmon_cond2_rep2.sf"/> + </section> </conditional> </conditional> <output name="switchList" ftype="rdata"> @@ -650,16 +657,16 @@ <param name="genomeAnnotation" value="annotation_salmon.gtf.gz"/> <param name="transcriptome" value="transcriptome.fasta.gz"/> <param name="countFiles" value="collection"/> - <section name="first_factor"> - <param name="factorLevel" value="health"/> - <param name="trans_counts" value="salmon_cond1_rep1.sf,salmon_cond1_rep2.sf"/> - </section> - <section name="second_factor"> - <param name="factorLevel" value="cancer"/> - <param name="trans_counts" value="salmon_cond2_rep1.sf,salmon_cond2_rep2.sf"/> - </section> <conditional name="tool_source"> <param name="selector" value="salmon"/> + <section name="first_factor"> + <param name="factorLevel" value="health"/> + <param name="trans_counts" value="salmon_cond1_rep1.sf,salmon_cond1_rep2.sf"/> + </section> + <section name="second_factor"> + <param name="factorLevel" value="cancer"/> + <param name="trans_counts" value="salmon_cond2_rep1.sf,salmon_cond2_rep2.sf"/> + </section> </conditional> </conditional> <output name="switchList" ftype="rdata"> @@ -668,10 +675,11 @@ </assert_contents> </output> <output_collection name="collection_counts_factor1" type="list" count="2"> - <element name="cancer0_dataset" file="test03_cancer_counts.tabular" ftype="tabular" lines_diff="6"/> + <element name="health0_dataset" file="test03_health_counts.tabular" ftype="tabular" lines_diff="6"/> + </output_collection> <output_collection name="collection_counts_factor2" type="list" count="2"> - <element name="health0_dataset" file="test03_health_counts.tabular" ftype="tabular" lines_diff="6"/> + <element name="cancer0_dataset" file="test03_cancer_counts.tabular" ftype="tabular" lines_diff="6"/> </output_collection> </test> <!-- Test 04: Extract isoform switches all outputs--> @@ -867,7 +875,7 @@ </output> <output name="isoformFeatures" ftype="tabular"> <assert_contents> - <has_size value="95185" delta="100"/> + <has_size value="94888" delta="100"/> <has_text text="gene_overall_mean"/> </assert_contents> </output> @@ -1039,7 +1047,7 @@ </output> <output name="isoformFeatures" ftype="tabular"> <assert_contents> - <has_size value="99607" delta="50"/> + <has_size value="99310" delta="50"/> <has_text text="gene_overall_mean"/> </assert_contents> </output> @@ -1061,6 +1069,54 @@ </assert_contents> </output> </test> + <!-- Test 09: Kallisto input--> + <test expect_num_outputs="1"> + <conditional name="functionMode"> + <param name="selector" value="data_import"/> + <param name="genomeAnnotation" value="annotation_kallisto.gtf.gz"/> + <param name="transcriptome" value="transcriptome_kallisto.fasta.gz"/> + <param name="countFiles" value="disabled"/> + <conditional name="tool_source"> + <param name="selector" value="kallisto"/> + <section name="first_factor"> + <param name="factorLevel" value="health"/> + <param name="trans_counts" value="kallisto_cond1_rep1.tsv,kallisto_cond1_rep2.tsv"/> + </section> + <section name="second_factor"> + <param name="factorLevel" value="cancer"/> + <param name="trans_counts" value="kallisto_cond2_rep1.tsv,kallisto_cond2_rep2.tsv"/> + </section> + </conditional> + </conditional> + <output name="switchList" file="test09.RData" ftype="rdata" compare="sim_size" delta="100"/> + </test> + <!-- Test 10: Test paired samples in the experimental design--> + <test expect_num_outputs="3"> + <conditional name="functionMode"> + <param name="selector" value="data_import"/> + <param name="genomeAnnotation" value="annotation_salmon.gtf.gz"/> + <param name="transcriptome" value="transcriptome.fasta.gz"/> + <param name="pairedSamples" value="true"/> + <param name="countFiles" value="matrix"/> + <conditional name="tool_source"> + <param name="selector" value="salmon"/> + <section name="first_factor"> + <param name="factorLevel" value="health"/> + <param name="trans_counts" value="salmon_cond1_rep1.sf,salmon_cond1_rep2.sf"/> + </section> + <section name="second_factor"> + <param name="factorLevel" value="cancer"/> + <param name="trans_counts" value="salmon_cond2_rep1.sf,salmon_cond2_rep2.sf"/> + </section> + </conditional> + </conditional> + <output name="switchList" ftype="rdata"> + <assert_contents> + <has_size value="652170" delta="300"/> + </assert_contents> + </output> + <output name="sample_annotation" file="test10_samples_annotation.tabular" ftype="tabular"/> + </test> </tests> <help><