Mercurial > repos > galaxyp > msi_filtering
comparison msi_filtering.xml @ 9:28ac8199d4d5 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_filtering commit e87eea03505ab1ba067e192bbbcdc6197dc4b42e
author | galaxyp |
---|---|
date | Tue, 04 Sep 2018 13:42:37 -0400 |
parents | 262db9893c6f |
children |
comparison
equal
deleted
inserted
replaced
8:262db9893c6f | 9:28ac8199d4d5 |
---|---|
1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.6"> | 1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.7"> |
2 <description>tool for filtering mass spectrometry imaging data</description> | 2 <description>tool for filtering mass spectrometry imaging data</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> | 4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> |
5 <requirement type="package" version="2.2.1">r-gridextra</requirement> | 5 <requirement type="package" version="2.2.1">r-gridextra</requirement> |
6 <requirement type="package" version="2.2.1">r-ggplot2</requirement> | 6 <requirement type="package" version="2.2.1">r-ggplot2</requirement> |
132 ############ Pixels in two columns format: x and y in different columns ############# | 132 ############ Pixels in two columns format: x and y in different columns ############# |
133 | 133 |
134 #elif str($pixels_cond.pixel_filtering) == "two_columns": | 134 #elif str($pixels_cond.pixel_filtering) == "two_columns": |
135 print("two columns") | 135 print("two columns") |
136 | 136 |
137 ## read tabular file, count number of rows (= number of pixels), extract dataframe with x,y,annotation (for QC), count number of valid pixels | 137 ## read tabular file |
138 input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, | 138 input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, |
139 stringsAsFactors = FALSE) | 139 stringsAsFactors = FALSE) |
140 startingrow = $pixels_cond.pixel_header+1 | 140 startingrow = $pixels_cond.pixel_header+1 |
141 numberpixels = length(startingrow:nrow(input_list)) | 141 numberpixels = length(startingrow:nrow(input_list)) |
142 inputpixels = input_list[startingrow:nrow(input_list),c($pixels_cond.pixel_column_x, $pixels_cond.pixel_column_y, $pixels_cond.annotation_column_xy)] | 142 inputpixels = input_list[startingrow:nrow(input_list),c($pixels_cond.pixel_column_x, $pixels_cond.pixel_column_y, $pixels_cond.annotation_column_xy)] |
143 | |
144 ## rewrite into x = 1, y = 1 format and filter msidata, count validpixels | |
145 pixelvector = character() | |
146 for (pixel in 1:nrow(inputpixels)){ | |
147 pixelvector[pixel] = paste0("x = ", inputpixels[pixel,1],", ", "y = ", inputpixels[pixel,2])} | |
148 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] | |
149 msidata = msidata[,pixelsofinterest] | |
150 validpixels=ncol(msidata) | |
151 | |
152 ## in case some pixels are left print annotation plot | |
143 colnames(inputpixels) = c("x", "y", "annotation") | 153 colnames(inputpixels) = c("x", "y", "annotation") |
144 position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE) | 154 position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE) |
145 validpixels = nrow(position_df) | |
146 colnames(position_df)[3] = "annotation" | 155 colnames(position_df)[3] = "annotation" |
147 position_df\$annotation = factor(position_df\$annotation) | 156 position_df\$annotation = factor(position_df\$annotation) |
148 | 157 |
149 ## for valid pixels: filter file for pixels | |
150 if (validpixels != 0){ | |
151 pixelvector = character() | |
152 for (pixel in 1:nrow(position_df)){ | |
153 pixelvector[pixel] = paste0("x = ", position_df[pixel,1],", ", "y = ", position_df[pixel,2])} | |
154 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] | |
155 msidata = msidata[,pixelsofinterest] | |
156 }else{ | |
157 validpixels=0} | |
158 | 158 |
159 ########### Pixels wihin x and y minima and maxima are kept ################### | 159 ########### Pixels wihin x and y minima and maxima are kept ################### |
160 | 160 |
161 #elif str($pixels_cond.pixel_filtering) == "pixel_range": | 161 #elif str($pixels_cond.pixel_filtering) == "pixel_range": |
162 print("pixel range") | 162 print("pixel range") |
195 | 195 |
196 ####################### Keep m/z from tabular file ######################### | 196 ####################### Keep m/z from tabular file ######################### |
197 | 197 |
198 ## feature filtering only when pixels/features/intensities are left | 198 ## feature filtering only when pixels/features/intensities are left |
199 npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE) | 199 npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE) |
200 | |
201 | 200 |
202 | 201 |
203 if (npeaks_before_filtering > 0) | 202 if (npeaks_before_filtering > 0) |
204 | 203 |
205 { | 204 { |
312 | 311 |
313 #end if | 312 #end if |
314 | 313 |
315 ## save msidata as Rfile | 314 ## save msidata as Rfile |
316 save(msidata, file="$msidata_filtered") | 315 save(msidata, file="$msidata_filtered") |
317 | 316 ## Number of empty TICs |
317 TICs2 = colSums(spectra(msidata)[], na.rm=TRUE) | |
318 }else{ | 318 }else{ |
319 print("Inputfile or file filtered for pixels has no intensities > 0") | 319 print("Inputfile or file filtered for pixels has no intensities > 0") |
320 numberfeatures = NA | 320 numberfeatures = NA |
321 validmz = NA | 321 validmz = NA |
322 ## Number of empty TICs | |
323 TICs2 = NA | |
322 } | 324 } |
323 | 325 |
324 #################### QC numbers ####################### | 326 #################### QC numbers ####################### |
325 | 327 |
326 | 328 |
342 ## Spectra multiplied with m/z (potential number of peaks) | 344 ## Spectra multiplied with m/z (potential number of peaks) |
343 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | 345 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) |
344 ## Percentage of intensities > 0 | 346 ## Percentage of intensities > 0 |
345 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) | 347 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) |
346 ## Number of empty TICs | 348 ## Number of empty TICs |
347 TICs2 = colSums(spectra(msidata)[], na.rm=TRUE) | |
348 NumemptyTIC2 = sum(TICs2 == 0) | 349 NumemptyTIC2 = sum(TICs2 == 0) |
349 ## median TIC | 350 ## median TIC |
350 medint2 = round(median(TICs2), digits=2) | 351 medint2 = round(median(TICs2), digits=2) |
351 | 352 |
352 properties = c("Number of m/z features", | 353 properties = c("Number of m/z features", |
354 "Number of pixels", | 355 "Number of pixels", |
355 "Range of x coordinates", | 356 "Range of x coordinates", |
356 "Range of y coordinates", | 357 "Range of y coordinates", |
357 "Intensities > 0", | 358 "Intensities > 0", |
358 "Median TIC per pixel", | 359 "Median TIC per pixel", |
359 "Number of zero TICs", | 360 "Number of empty spectra", |
360 "pixel overview", | 361 "pixel overview", |
361 "feature overview") | 362 "feature overview") |
362 | 363 |
363 before = c(paste0(maxfeatures), | 364 before = c(paste0(maxfeatures), |
364 paste0(minmz, " - ", maxmz), | 365 paste0(minmz, " - ", maxmz), |
437 | 438 |
438 #if $output_matrix: | 439 #if $output_matrix: |
439 | 440 |
440 spectramatrix = spectra(msidata)[] | 441 spectramatrix = spectra(msidata)[] |
441 spectramatrix = cbind(mz(msidata),spectramatrix) | 442 spectramatrix = cbind(mz(msidata),spectramatrix) |
442 newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) | 443 newmatrix = rbind(c("mz", names(pixels(msidata))), spectramatrix) |
443 write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | 444 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") |
444 | 445 |
445 #end if | 446 #end if |
446 | 447 |
447 }else{ | 448 }else{ |
448 print("Inputfile or filtered file has no intensities > 0") | 449 print("Inputfile or filtered file has no intensities > 0") |
531 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> | 532 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> |
532 </inputs> | 533 </inputs> |
533 | 534 |
534 <outputs> | 535 <outputs> |
535 <data format="rdata" name="msidata_filtered" label="$infile.display_name filtered"/> | 536 <data format="rdata" name="msidata_filtered" label="$infile.display_name filtered"/> |
536 <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"/> | 537 <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"/> |
537 <data format="tabular" name="matrixasoutput" label="$infile.display_name filtered_matrix"> | 538 <data format="tabular" name="intensity_matrix" label="$infile.display_name filtered_matrix"> |
538 <filter>output_matrix</filter> | 539 <filter>output_matrix</filter> |
539 </data> | 540 </data> |
540 </outputs> | 541 </outputs> |
541 <tests> | 542 <tests> |
542 <test expect_num_outputs="2"> | 543 <test expect_num_outputs="2"> |
550 <param name="annotation_column" value="2"/> | 551 <param name="annotation_column" value="2"/> |
551 <param name="features_filtering" value="features_list"/> | 552 <param name="features_filtering" value="features_list"/> |
552 <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/> | 553 <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/> |
553 <param name="feature_column" value="2"/> | 554 <param name="feature_column" value="2"/> |
554 <param name="feature_header" value="1"/> | 555 <param name="feature_header" value="1"/> |
555 <output name="filtering_qc" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/> | 556 <output name="QC_overview" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/> |
556 <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size"/> | 557 <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size"/> |
557 </test> | 558 </test> |
558 <test expect_num_outputs="2"> | 559 <test expect_num_outputs="2"> |
559 <param name="infile" value="" ftype="imzml"> | 560 <param name="infile" value="" ftype="imzml"> |
560 <composite_data value="Example_Continuous.imzML"/> | 561 <composite_data value="Example_Continuous.imzML"/> |
563 <param name="pixel_filtering" value="pixel_range"/> | 564 <param name="pixel_filtering" value="pixel_range"/> |
564 <param name="min_x_range" value="10"/> | 565 <param name="min_x_range" value="10"/> |
565 <param name="max_x_range" value="20"/> | 566 <param name="max_x_range" value="20"/> |
566 <param name="min_y_range" value="2"/> | 567 <param name="min_y_range" value="2"/> |
567 <param name="max_y_range" value="2"/> | 568 <param name="max_y_range" value="2"/> |
568 <output name="filtering_qc" file="imzml_filtered2.pdf" compare="sim_size" delta="20000"/> | 569 <output name="QC_overview" file="imzml_filtered2.pdf" compare="sim_size" delta="20000"/> |
569 <output name="msidata_filtered" file="imzml_filtered2.RData" compare="sim_size"/> | 570 <output name="msidata_filtered" file="imzml_filtered2.RData" compare="sim_size"/> |
570 </test> | 571 </test> |
571 <test expect_num_outputs="3"> | 572 <test expect_num_outputs="3"> |
572 <param name="infile" value="" ftype="imzml"> | 573 <param name="infile" value="" ftype="imzml"> |
573 <composite_data value="Example_Continuous.imzML"/> | 574 <composite_data value="Example_Continuous.imzML"/> |
580 <param name="max_y_range" value="2"/> | 581 <param name="max_y_range" value="2"/> |
581 <param name="features_filtering" value="features_range"/> | 582 <param name="features_filtering" value="features_range"/> |
582 <param name="min_mz" value="350" /> | 583 <param name="min_mz" value="350" /> |
583 <param name="max_mz" value="500"/> | 584 <param name="max_mz" value="500"/> |
584 <param name="output_matrix" value="True"/> | 585 <param name="output_matrix" value="True"/> |
585 <output name="filtering_qc" file="imzml_filtered3.pdf" compare="sim_size" delta="20000"/> | 586 <output name="QC_overview" file="imzml_filtered3.pdf" compare="sim_size" delta="20000"/> |
586 <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size"/> | 587 <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size"/> |
587 <output name="matrixasoutput" file="imzml_matrix3.tabular"/> | 588 <output name="intensity_matrix" file="imzml_matrix3.tabular"/> |
588 </test> | 589 </test> |
589 <test expect_num_outputs="2"> | 590 <test expect_num_outputs="2"> |
590 <param name="infile" value="" ftype="imzml"> | 591 <param name="infile" value="" ftype="imzml"> |
591 <composite_data value="Example_Continuous.imzML"/> | 592 <composite_data value="Example_Continuous.imzML"/> |
592 <composite_data value="Example_Continuous.ibd"/> | 593 <composite_data value="Example_Continuous.ibd"/> |
594 <param name="pixel_filtering" value="two_columns"/> | 595 <param name="pixel_filtering" value="two_columns"/> |
595 <param name="two_columns_pixel" ftype="tabular" value = "inputpixels_2column.tabular"/> | 596 <param name="two_columns_pixel" ftype="tabular" value = "inputpixels_2column.tabular"/> |
596 <param name="pixel_column_x" value="1"/> | 597 <param name="pixel_column_x" value="1"/> |
597 <param name="pixel_column_y" value="3"/> | 598 <param name="pixel_column_y" value="3"/> |
598 <param name="annotation_column_xy" value="2"/> | 599 <param name="annotation_column_xy" value="2"/> |
599 <output name="filtering_qc" file="imzml_filtered4.pdf" compare="sim_size" delta="20000"/> | 600 <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size" delta="20000"/> |
600 <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/> | 601 <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/> |
601 </test> | 602 </test> |
602 <test expect_num_outputs="2"> | 603 <test expect_num_outputs="2"> |
603 <param name="infile" value="" ftype="imzml"> | 604 <param name="infile" value="" ftype="imzml"> |
604 <composite_data value="Example_Continuous.imzML"/> | 605 <composite_data value="Example_Continuous.imzML"/> |
611 <param name="max_y_range" value="20"/> | 612 <param name="max_y_range" value="20"/> |
612 <param name="features_filtering" value="features_list"/> | 613 <param name="features_filtering" value="features_list"/> |
613 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest5.tabular"/> | 614 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest5.tabular"/> |
614 <param name="feature_column" value="1"/> | 615 <param name="feature_column" value="1"/> |
615 <param name="feature_header" value="0"/> | 616 <param name="feature_header" value="0"/> |
616 <output name="filtering_qc" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/> | 617 <output name="QC_overview" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/> |
617 <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" /> | 618 <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" /> |
618 </test> | 619 </test> |
619 <test expect_num_outputs="3"> | 620 <test expect_num_outputs="3"> |
620 <param name="infile" value="" ftype="analyze75"> | 621 <param name="infile" value="" ftype="analyze75"> |
621 <composite_data value="Analyze75.hdr"/> | 622 <composite_data value="Analyze75.hdr"/> |
627 <param name="pixel_column" value="1"/> | 628 <param name="pixel_column" value="1"/> |
628 <param name="features_filtering" value="features_list"/> | 629 <param name="features_filtering" value="features_list"/> |
629 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/> | 630 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/> |
630 <param name="feature_column" value="1"/> | 631 <param name="feature_column" value="1"/> |
631 <param name="output_matrix" value="True"/> | 632 <param name="output_matrix" value="True"/> |
632 <output name="filtering_qc" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/> | 633 <output name="QC_overview" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/> |
633 <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" /> | 634 <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" /> |
634 <output name="matrixasoutput" file="analyze_matrix.tabular"/> | 635 <output name="intensity_matrix" file="analyze_matrix.tabular"/> |
635 </test> | 636 </test> |
636 <test expect_num_outputs="2"> | 637 <test expect_num_outputs="2"> |
637 <param name="infile" value="" ftype="analyze75"> | 638 <param name="infile" value="" ftype="analyze75"> |
638 <composite_data value="Analyze75.hdr"/> | 639 <composite_data value="Analyze75.hdr"/> |
639 <composite_data value="Analyze75.img"/> | 640 <composite_data value="Analyze75.img"/> |
640 <composite_data value="Analyze75.t2m"/> | 641 <composite_data value="Analyze75.t2m"/> |
641 </param> | 642 </param> |
642 <output name="filtering_qc" file="analyze75_filtered2.pdf" compare="sim_size" delta="20000"/> | 643 <output name="QC_overview" file="analyze75_filtered2.pdf" compare="sim_size" delta="20000"/> |
643 <output name="msidata_filtered" file="analyze_filteredoutside.RData" compare="sim_size" /> | 644 <output name="msidata_filtered" file="analyze_filteredoutside.RData" compare="sim_size" /> |
644 </test> | 645 </test> |
645 <test expect_num_outputs="3"> | 646 <test expect_num_outputs="3"> |
646 <param name="infile" value="preprocessed.RData" ftype="rdata"/> | 647 <param name="infile" value="preprocessed.RData" ftype="rdata"/> |
647 <conditional name="outputs"> | 648 <conditional name="outputs"> |
648 <param name="outputs_select" value="no_quality_control"/> | 649 <param name="outputs_select" value="no_quality_control"/> |
649 </conditional> | 650 </conditional> |
650 <param name="output_matrix" value="True"/> | 651 <param name="output_matrix" value="True"/> |
651 <output name="matrixasoutput" file="rdata_matrix.tabular"/> | 652 <output name="intensity_matrix" file="rdata_matrix.tabular"/> |
652 <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size" /> | 653 <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size" /> |
653 <output name="filtering_qc" file="rdata_notfiltered.pdf" compare="sim_size" /> | 654 <output name="QC_overview" file="rdata_notfiltered.pdf" compare="sim_size" /> |
654 </test> | 655 </test> |
655 </tests> | 656 </tests> |
656 <help> | 657 <help> |
657 <![CDATA[ | 658 <![CDATA[ |
658 | 659 |
681 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) | 682 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) |
682 | 683 |
683 | 684 |
684 Tip: | 685 Tip: |
685 | 686 |
686 - It is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If you have m/z from dataset A and you want to use them to filter dataset B, first find the corresponding (closest) features in dataset B by using the tool "Join two files on column allowing a small difference". Afterwards use the corresponding feature m/z from dataset A to filter dataset B. | 687 - It is recommended to use the filtering tool only for m/z which have been extracted from the same dataset. If the m/z values are from a different dataset, the tool "Join two files on column allowing a small difference" should be used to find corresponding m/z values, which can then be used for filtering. |
687 | 688 |
688 | 689 |
689 ]]> | 690 ]]> |
690 </help> | 691 </help> |
691 <citations> | 692 <citations> |