comparison msi_filtering.xml @ 9:28ac8199d4d5 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_filtering commit e87eea03505ab1ba067e192bbbcdc6197dc4b42e
author galaxyp
date Tue, 04 Sep 2018 13:42:37 -0400
parents 262db9893c6f
children
comparison
equal deleted inserted replaced
8:262db9893c6f 9:28ac8199d4d5
1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.6"> 1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.7">
2 <description>tool for filtering mass spectrometry imaging data</description> 2 <description>tool for filtering mass spectrometry imaging data</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> 4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
5 <requirement type="package" version="2.2.1">r-gridextra</requirement> 5 <requirement type="package" version="2.2.1">r-gridextra</requirement>
6 <requirement type="package" version="2.2.1">r-ggplot2</requirement> 6 <requirement type="package" version="2.2.1">r-ggplot2</requirement>
132 ############ Pixels in two columns format: x and y in different columns ############# 132 ############ Pixels in two columns format: x and y in different columns #############
133 133
134 #elif str($pixels_cond.pixel_filtering) == "two_columns": 134 #elif str($pixels_cond.pixel_filtering) == "two_columns":
135 print("two columns") 135 print("two columns")
136 136
137 ## read tabular file, count number of rows (= number of pixels), extract dataframe with x,y,annotation (for QC), count number of valid pixels 137 ## read tabular file
138 input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, 138 input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE,
139 stringsAsFactors = FALSE) 139 stringsAsFactors = FALSE)
140 startingrow = $pixels_cond.pixel_header+1 140 startingrow = $pixels_cond.pixel_header+1
141 numberpixels = length(startingrow:nrow(input_list)) 141 numberpixels = length(startingrow:nrow(input_list))
142 inputpixels = input_list[startingrow:nrow(input_list),c($pixels_cond.pixel_column_x, $pixels_cond.pixel_column_y, $pixels_cond.annotation_column_xy)] 142 inputpixels = input_list[startingrow:nrow(input_list),c($pixels_cond.pixel_column_x, $pixels_cond.pixel_column_y, $pixels_cond.annotation_column_xy)]
143
144 ## rewrite into x = 1, y = 1 format and filter msidata, count validpixels
145 pixelvector = character()
146 for (pixel in 1:nrow(inputpixels)){
147 pixelvector[pixel] = paste0("x = ", inputpixels[pixel,1],", ", "y = ", inputpixels[pixel,2])}
148 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector]
149 msidata = msidata[,pixelsofinterest]
150 validpixels=ncol(msidata)
151
152 ## in case some pixels are left print annotation plot
143 colnames(inputpixels) = c("x", "y", "annotation") 153 colnames(inputpixels) = c("x", "y", "annotation")
144 position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE) 154 position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE)
145 validpixels = nrow(position_df)
146 colnames(position_df)[3] = "annotation" 155 colnames(position_df)[3] = "annotation"
147 position_df\$annotation = factor(position_df\$annotation) 156 position_df\$annotation = factor(position_df\$annotation)
148 157
149 ## for valid pixels: filter file for pixels
150 if (validpixels != 0){
151 pixelvector = character()
152 for (pixel in 1:nrow(position_df)){
153 pixelvector[pixel] = paste0("x = ", position_df[pixel,1],", ", "y = ", position_df[pixel,2])}
154 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector]
155 msidata = msidata[,pixelsofinterest]
156 }else{
157 validpixels=0}
158 158
159 ########### Pixels wihin x and y minima and maxima are kept ################### 159 ########### Pixels wihin x and y minima and maxima are kept ###################
160 160
161 #elif str($pixels_cond.pixel_filtering) == "pixel_range": 161 #elif str($pixels_cond.pixel_filtering) == "pixel_range":
162 print("pixel range") 162 print("pixel range")
195 195
196 ####################### Keep m/z from tabular file ######################### 196 ####################### Keep m/z from tabular file #########################
197 197
198 ## feature filtering only when pixels/features/intensities are left 198 ## feature filtering only when pixels/features/intensities are left
199 npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE) 199 npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE)
200
201 200
202 201
203 if (npeaks_before_filtering > 0) 202 if (npeaks_before_filtering > 0)
204 203
205 { 204 {
312 311
313 #end if 312 #end if
314 313
315 ## save msidata as Rfile 314 ## save msidata as Rfile
316 save(msidata, file="$msidata_filtered") 315 save(msidata, file="$msidata_filtered")
317 316 ## Number of empty TICs
317 TICs2 = colSums(spectra(msidata)[], na.rm=TRUE)
318 }else{ 318 }else{
319 print("Inputfile or file filtered for pixels has no intensities > 0") 319 print("Inputfile or file filtered for pixels has no intensities > 0")
320 numberfeatures = NA 320 numberfeatures = NA
321 validmz = NA 321 validmz = NA
322 ## Number of empty TICs
323 TICs2 = NA
322 } 324 }
323 325
324 #################### QC numbers ####################### 326 #################### QC numbers #######################
325 327
326 328
342 ## Spectra multiplied with m/z (potential number of peaks) 344 ## Spectra multiplied with m/z (potential number of peaks)
343 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) 345 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
344 ## Percentage of intensities > 0 346 ## Percentage of intensities > 0
345 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) 347 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2)
346 ## Number of empty TICs 348 ## Number of empty TICs
347 TICs2 = colSums(spectra(msidata)[], na.rm=TRUE)
348 NumemptyTIC2 = sum(TICs2 == 0) 349 NumemptyTIC2 = sum(TICs2 == 0)
349 ## median TIC 350 ## median TIC
350 medint2 = round(median(TICs2), digits=2) 351 medint2 = round(median(TICs2), digits=2)
351 352
352 properties = c("Number of m/z features", 353 properties = c("Number of m/z features",
354 "Number of pixels", 355 "Number of pixels",
355 "Range of x coordinates", 356 "Range of x coordinates",
356 "Range of y coordinates", 357 "Range of y coordinates",
357 "Intensities > 0", 358 "Intensities > 0",
358 "Median TIC per pixel", 359 "Median TIC per pixel",
359 "Number of zero TICs", 360 "Number of empty spectra",
360 "pixel overview", 361 "pixel overview",
361 "feature overview") 362 "feature overview")
362 363
363 before = c(paste0(maxfeatures), 364 before = c(paste0(maxfeatures),
364 paste0(minmz, " - ", maxmz), 365 paste0(minmz, " - ", maxmz),
437 438
438 #if $output_matrix: 439 #if $output_matrix:
439 440
440 spectramatrix = spectra(msidata)[] 441 spectramatrix = spectra(msidata)[]
441 spectramatrix = cbind(mz(msidata),spectramatrix) 442 spectramatrix = cbind(mz(msidata),spectramatrix)
442 newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) 443 newmatrix = rbind(c("mz", names(pixels(msidata))), spectramatrix)
443 write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") 444 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t")
444 445
445 #end if 446 #end if
446 447
447 }else{ 448 }else{
448 print("Inputfile or filtered file has no intensities > 0") 449 print("Inputfile or filtered file has no intensities > 0")
531 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> 532 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/>
532 </inputs> 533 </inputs>
533 534
534 <outputs> 535 <outputs>
535 <data format="rdata" name="msidata_filtered" label="$infile.display_name filtered"/> 536 <data format="rdata" name="msidata_filtered" label="$infile.display_name filtered"/>
536 <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"/> 537 <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"/>
537 <data format="tabular" name="matrixasoutput" label="$infile.display_name filtered_matrix"> 538 <data format="tabular" name="intensity_matrix" label="$infile.display_name filtered_matrix">
538 <filter>output_matrix</filter> 539 <filter>output_matrix</filter>
539 </data> 540 </data>
540 </outputs> 541 </outputs>
541 <tests> 542 <tests>
542 <test expect_num_outputs="2"> 543 <test expect_num_outputs="2">
550 <param name="annotation_column" value="2"/> 551 <param name="annotation_column" value="2"/>
551 <param name="features_filtering" value="features_list"/> 552 <param name="features_filtering" value="features_list"/>
552 <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/> 553 <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/>
553 <param name="feature_column" value="2"/> 554 <param name="feature_column" value="2"/>
554 <param name="feature_header" value="1"/> 555 <param name="feature_header" value="1"/>
555 <output name="filtering_qc" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/> 556 <output name="QC_overview" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/>
556 <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size"/> 557 <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size"/>
557 </test> 558 </test>
558 <test expect_num_outputs="2"> 559 <test expect_num_outputs="2">
559 <param name="infile" value="" ftype="imzml"> 560 <param name="infile" value="" ftype="imzml">
560 <composite_data value="Example_Continuous.imzML"/> 561 <composite_data value="Example_Continuous.imzML"/>
563 <param name="pixel_filtering" value="pixel_range"/> 564 <param name="pixel_filtering" value="pixel_range"/>
564 <param name="min_x_range" value="10"/> 565 <param name="min_x_range" value="10"/>
565 <param name="max_x_range" value="20"/> 566 <param name="max_x_range" value="20"/>
566 <param name="min_y_range" value="2"/> 567 <param name="min_y_range" value="2"/>
567 <param name="max_y_range" value="2"/> 568 <param name="max_y_range" value="2"/>
568 <output name="filtering_qc" file="imzml_filtered2.pdf" compare="sim_size" delta="20000"/> 569 <output name="QC_overview" file="imzml_filtered2.pdf" compare="sim_size" delta="20000"/>
569 <output name="msidata_filtered" file="imzml_filtered2.RData" compare="sim_size"/> 570 <output name="msidata_filtered" file="imzml_filtered2.RData" compare="sim_size"/>
570 </test> 571 </test>
571 <test expect_num_outputs="3"> 572 <test expect_num_outputs="3">
572 <param name="infile" value="" ftype="imzml"> 573 <param name="infile" value="" ftype="imzml">
573 <composite_data value="Example_Continuous.imzML"/> 574 <composite_data value="Example_Continuous.imzML"/>
580 <param name="max_y_range" value="2"/> 581 <param name="max_y_range" value="2"/>
581 <param name="features_filtering" value="features_range"/> 582 <param name="features_filtering" value="features_range"/>
582 <param name="min_mz" value="350" /> 583 <param name="min_mz" value="350" />
583 <param name="max_mz" value="500"/> 584 <param name="max_mz" value="500"/>
584 <param name="output_matrix" value="True"/> 585 <param name="output_matrix" value="True"/>
585 <output name="filtering_qc" file="imzml_filtered3.pdf" compare="sim_size" delta="20000"/> 586 <output name="QC_overview" file="imzml_filtered3.pdf" compare="sim_size" delta="20000"/>
586 <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size"/> 587 <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size"/>
587 <output name="matrixasoutput" file="imzml_matrix3.tabular"/> 588 <output name="intensity_matrix" file="imzml_matrix3.tabular"/>
588 </test> 589 </test>
589 <test expect_num_outputs="2"> 590 <test expect_num_outputs="2">
590 <param name="infile" value="" ftype="imzml"> 591 <param name="infile" value="" ftype="imzml">
591 <composite_data value="Example_Continuous.imzML"/> 592 <composite_data value="Example_Continuous.imzML"/>
592 <composite_data value="Example_Continuous.ibd"/> 593 <composite_data value="Example_Continuous.ibd"/>
594 <param name="pixel_filtering" value="two_columns"/> 595 <param name="pixel_filtering" value="two_columns"/>
595 <param name="two_columns_pixel" ftype="tabular" value = "inputpixels_2column.tabular"/> 596 <param name="two_columns_pixel" ftype="tabular" value = "inputpixels_2column.tabular"/>
596 <param name="pixel_column_x" value="1"/> 597 <param name="pixel_column_x" value="1"/>
597 <param name="pixel_column_y" value="3"/> 598 <param name="pixel_column_y" value="3"/>
598 <param name="annotation_column_xy" value="2"/> 599 <param name="annotation_column_xy" value="2"/>
599 <output name="filtering_qc" file="imzml_filtered4.pdf" compare="sim_size" delta="20000"/> 600 <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size" delta="20000"/>
600 <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/> 601 <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/>
601 </test> 602 </test>
602 <test expect_num_outputs="2"> 603 <test expect_num_outputs="2">
603 <param name="infile" value="" ftype="imzml"> 604 <param name="infile" value="" ftype="imzml">
604 <composite_data value="Example_Continuous.imzML"/> 605 <composite_data value="Example_Continuous.imzML"/>
611 <param name="max_y_range" value="20"/> 612 <param name="max_y_range" value="20"/>
612 <param name="features_filtering" value="features_list"/> 613 <param name="features_filtering" value="features_list"/>
613 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest5.tabular"/> 614 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest5.tabular"/>
614 <param name="feature_column" value="1"/> 615 <param name="feature_column" value="1"/>
615 <param name="feature_header" value="0"/> 616 <param name="feature_header" value="0"/>
616 <output name="filtering_qc" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/> 617 <output name="QC_overview" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/>
617 <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" /> 618 <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" />
618 </test> 619 </test>
619 <test expect_num_outputs="3"> 620 <test expect_num_outputs="3">
620 <param name="infile" value="" ftype="analyze75"> 621 <param name="infile" value="" ftype="analyze75">
621 <composite_data value="Analyze75.hdr"/> 622 <composite_data value="Analyze75.hdr"/>
627 <param name="pixel_column" value="1"/> 628 <param name="pixel_column" value="1"/>
628 <param name="features_filtering" value="features_list"/> 629 <param name="features_filtering" value="features_list"/>
629 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/> 630 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/>
630 <param name="feature_column" value="1"/> 631 <param name="feature_column" value="1"/>
631 <param name="output_matrix" value="True"/> 632 <param name="output_matrix" value="True"/>
632 <output name="filtering_qc" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/> 633 <output name="QC_overview" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/>
633 <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" /> 634 <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" />
634 <output name="matrixasoutput" file="analyze_matrix.tabular"/> 635 <output name="intensity_matrix" file="analyze_matrix.tabular"/>
635 </test> 636 </test>
636 <test expect_num_outputs="2"> 637 <test expect_num_outputs="2">
637 <param name="infile" value="" ftype="analyze75"> 638 <param name="infile" value="" ftype="analyze75">
638 <composite_data value="Analyze75.hdr"/> 639 <composite_data value="Analyze75.hdr"/>
639 <composite_data value="Analyze75.img"/> 640 <composite_data value="Analyze75.img"/>
640 <composite_data value="Analyze75.t2m"/> 641 <composite_data value="Analyze75.t2m"/>
641 </param> 642 </param>
642 <output name="filtering_qc" file="analyze75_filtered2.pdf" compare="sim_size" delta="20000"/> 643 <output name="QC_overview" file="analyze75_filtered2.pdf" compare="sim_size" delta="20000"/>
643 <output name="msidata_filtered" file="analyze_filteredoutside.RData" compare="sim_size" /> 644 <output name="msidata_filtered" file="analyze_filteredoutside.RData" compare="sim_size" />
644 </test> 645 </test>
645 <test expect_num_outputs="3"> 646 <test expect_num_outputs="3">
646 <param name="infile" value="preprocessed.RData" ftype="rdata"/> 647 <param name="infile" value="preprocessed.RData" ftype="rdata"/>
647 <conditional name="outputs"> 648 <conditional name="outputs">
648 <param name="outputs_select" value="no_quality_control"/> 649 <param name="outputs_select" value="no_quality_control"/>
649 </conditional> 650 </conditional>
650 <param name="output_matrix" value="True"/> 651 <param name="output_matrix" value="True"/>
651 <output name="matrixasoutput" file="rdata_matrix.tabular"/> 652 <output name="intensity_matrix" file="rdata_matrix.tabular"/>
652 <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size" /> 653 <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size" />
653 <output name="filtering_qc" file="rdata_notfiltered.pdf" compare="sim_size" /> 654 <output name="QC_overview" file="rdata_notfiltered.pdf" compare="sim_size" />
654 </test> 655 </test>
655 </tests> 656 </tests>
656 <help> 657 <help>
657 <![CDATA[ 658 <![CDATA[
658 659
681 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) 682 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns)
682 683
683 684
684 Tip: 685 Tip:
685 686
686 - It is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If you have m/z from dataset A and you want to use them to filter dataset B, first find the corresponding (closest) features in dataset B by using the tool "Join two files on column allowing a small difference". Afterwards use the corresponding feature m/z from dataset A to filter dataset B. 687 - It is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If the m/z values are from a different dataset, the tool "Join two files on column allowing a small difference" should be used to find corresponding m/z values, which can then be used for filtering.
687 688
688 689
689 ]]> 690 ]]>
690 </help> 691 </help>
691 <citations> 692 <citations>