Galaxy |

Changeset 0:cd2f3a280463 (2025-01-22)

Next changeset 1:8a6c7302de66 (2025-08-14)

Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/bioconductor-scp commit a0a1a3de5dd24b2aabe96ec3d6f89acdcf5e462b

added:
bioconductor_scp.xml
help.xml
macros.xml
test-data/PCA.pdf
test-data/QC_boxplot.pdf
test-data/QC_boxplot_peptide.pdf
test-data/QC_boxplot_peptide_norm.pdf
test-data/QC_boxplot_protein_norm.pdf
test-data/QC_heatmap_proteins.pdf
test-data/QC_medianCV.pdf
test-data/QC_plot_SCR.pdf
test-data/QC_plot_SCR_col.pdf
test-data/UMAP.pdf
test-data/evidence_subset.txt
test-data/sampleAnnotation.txt
utils.r

diff -r 000000000000 -r cd2f3a280463 bioconductor_scp.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bioconductor_scp.xml Wed Jan 22 07:44:00 2025 +0000

[

b'@@ -0,0 +1,515 @@\n+<tool id="bioconductor_scp" name="bioconductor-scp" version="@TOOL_VERSION@+galaxy0" profile="23.0">\n+ <description>single cell proteomics data analysis workflow</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ <import>help.xml</import>\n+ </macros>\n+ <xrefs>\n+ <xref type="bio.tools">scp</xref>\n+ </xrefs>\n+ <requirements>\n+ <requirement type="package" version="@TOOL_VERSION@">bioconductor-scp</requirement>\n+ <requirement type="package" version="3.54.0">bioconductor-sva</requirement>\n+ <requirement type="package" version="1.80.0">bioconductor-impute</requirement>\n+ <requirement type="package" version="1.34.0">bioconductor-scater</requirement>\n+ <requirement type="package" version="1.16.0">bioconductor-qfeatures</requirement>\n+ <requirement type="package" version="3.62.1">bioconductor-limma</requirement>\n+ <requirement type="package" version="3.5.1">r-ggplot2</requirement>\n+ </requirements>\n+ <required_files>\n+ <include path="utils.r" />\n+ </required_files>\n+ <expand macro="creator" />\n+ <command detect_errors="exit_code"><![CDATA[\n+ echo ${run_script} &&\n+ Rscript -e \'source("${__tool_directory__}/utils.r")\' -e \'source("${run_script}")\'\n+ #if $data_export.export_R_script\n+ && cat ${run_script} >> $script\n+ #end if\n+ ]]></command>\n+ <configfiles>\n+ <configfile name="run_script"><![CDATA[\n+ data_input <- read.delim("$input_data", sep="\\t")\n+ metadata <- read.delim("$input_annotations", sep="\\t")\n+ runCol <- colnames(data_input)[$runcol]\n+ fcol_aggregation_pep <- colnames(data_input)[${peptide_aggregation.column_aggregation_peptides}]\n+ fcol_aggregation_prot <- colnames(data_input)[${protein_aggregation.column_aggregation_proteins}]\n+\n+ dir.create("plots")\n+\n+ scp <- scp::readSCP(\n+ assayData = data_input,\n+ colData = metadata,\n+ runCol = runCol,\n+ removeEmptyCols = $remove_empty_columns\n+ )\n+ number_of_assays <- length(scp)\n+ scp <- QFeatures::zeroIsNA(scp, i = 1:number_of_assays)\n+\n+ #if $filtering_data.filter_reverse\n+ scp <- QFeatures::filterFeatures(scp,\n+ ~ Reverse != "+")\n+ #end if\n+\n+ #if $filtering_data.filter_contaminants\n+ scp <- QFeatures::filterFeatures(scp,\n+ ~ Potential.contaminant != "+")\n+ #end if\n+\n+ scp <- QFeatures::filterFeatures(scp,\n+ ~ !is.na(PIF) & PIF > ${filtering_data.PIF_threshold})\n+\n+ keepAssay <- QFeatures::dims(scp)[1, ] > ${filtering_data.minimum_features}\n+ scp <- scp[, , keepAssay]\n+ \n+ number_of_assays <- length(scp)\n+ single_cell_channels <- gsub(",", "|", "${filtering_data.single_cells}")\n+\n+ scp <- scp::computeSCR(scp,\n+ i = 1:number_of_assays,\n+ colvar = "SampleType",\n+ carrierPattern = "Carrier",\n+ samplePattern = single_cell_channels,\n+ sampleFUN = "mean",\n+ rowDataName = "MeanSCR")\n+ \n+ #if $generate_QC_plots\n+ QC_plot_SCR <- QFeatures::rbindRowData(scp, i = 1:number_of_assays) |>\n+ data.frame() |>\n+ ggplot2::ggplot(ggplot2::aes(x = MeanSCR)) +\n+ ggplot2::geom_histogram() +\n+ ggplot2::geom_vline(xintercept = c(1/$count_cell_carrier, 0.1),\n+ lty = c(2, 1)) +\n+ ggplot2::scale_x_log10()\n+ ggplot2::ggsave(filename = file.path("plots", "QC_plot_SCR.pdf"), QC_plot_SCR)\n+ \n+ QC_plot_SCR_col <- QFeatures::rbindRowData(scp, i = 1:number_of_assays) |>\n+ data.frame() |>\n+ ggplot2::ggplot(ggplot2::aes(x = MeanSCR, co'..b'" file="PCA.pdf" ftype="pdf" compare="sim_size" delta="60"/>\n+ <element name="QC_boxplot_peptide" file="QC_boxplot_peptide.pdf" ftype="pdf" compare="sim_size" delta="40"/>\n+ <element name="QC_boxplot_peptide_norm" file="QC_boxplot_peptide_norm.pdf" ftype="pdf" compare="sim_size" delta="40"/>\n+ <element name="QC_boxplot_protein" file="QC_boxplot.pdf" ftype="pdf" compare="sim_size" delta="40"/>\n+ <element name="QC_boxplot_protein_norm" file="QC_boxplot_protein_norm.pdf" ftype="pdf" compare="sim_size" delta="40"/>\n+ <element name="QC_heatmap_proteins" file="QC_heatmap_proteins.pdf" ftype="pdf" compare="sim_size" delta="40"/>\n+ <element name="QC_medianCV" file="QC_medianCV.pdf" ftype="pdf" compare="sim_size" delta="40"/>\n+ <element name="QC_plot_SCR" file="QC_plot_SCR.pdf" ftype="pdf" compare="sim_size" delta="40"/>\n+ <element name="QC_plot_SCR_col" file="QC_plot_SCR_col.pdf" ftype="pdf" compare="sim_size" delta="40"/>\n+ <element name="UMAP" file="UMAP.pdf" ftype="pdf" compare="sim_size" delta="200"/>\n+ </output_collection>\n+ <output name="Processed_data">\n+ <assert_contents>\n+ <has_size size="625000" delta="20"/>\n+ <has_n_lines n="90"/>\n+ <has_text text="E9PAV3"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n+ <test expect_num_outputs=\'3\'>\n+ <param name="input_data" value="evidence_subset.txt" />\n+ <param name="input_annotations" value="sampleAnnotation.txt"/>\n+ <param name="runcol" value="19"/>\n+ <param name="single_cells" value="Macrophage,Monocyte"/>\n+ <param name="samples_to_keep" value="Macrophage,Monocyte,Blank"/>\n+ <param name="column_aggregation_peptides" value="6"/>\n+ <param name="column_aggregation_proteins" value="17"/>\n+ <param name="batch_col" value="2"/>\n+ <param name="export_tables" value="true"/>\n+ <param name="pca_coloring" value="4"/>\n+ <output_collection name="intermediate_outputs" type="list" count="6"/>\n+ <output name="Processed_data">\n+ <assert_contents>\n+ <has_size size="625000" delta="20"/>\n+ <has_n_lines n="90"/>\n+ <has_text text="E9PAV3"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n+ <test expect_num_outputs=\'4\'>\n+ <param name="input_data" value="evidence_subset.txt" />\n+ <param name="input_annotations" value="sampleAnnotation.txt"/>\n+ <param name="runcol" value="19"/>\n+ <param name="single_cells" value="Macrophage,Monocyte"/>\n+ <param name="samples_to_keep" value="Macrophage,Monocyte,Blank"/>\n+ <param name="batch_col" value="2"/>\n+ <param name="export_RData" value="TRUE"/>\n+ <param name="export_R_script" value="TRUE"/>\n+ <param name="column_aggregation_peptides" value="6"/>\n+ <param name="column_aggregation_proteins" value="17"/>\n+ <param name="pca_coloring" value="4"/>\n+ <output name="Processed_data">\n+ <assert_contents>\n+ <has_size size="625000" delta="20"/>\n+ <has_n_lines n="90"/>\n+ <has_text text="E9PAV3"/>\n+ </assert_contents>\n+ </output>\n+ <output name="script">\n+ <assert_contents>\n+ <has_n_lines n="271"/>\n+ <has_text text=\'ggplot2::ggsave(filename = file.path("plots", "PCA.pdf"), pca)\'/>\n+ </assert_contents>\n+ </output>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+ @GENERAL_HELP@\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n\\ No newline at end of file\n'

diff -r 000000000000 -r cd2f3a280463 help.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/help.xml Wed Jan 22 07:44:00 2025 +0000

@@ -0,0 +1,38 @@
+<macros>
+
+<token name="@GENERAL_HELP@">
+Scp Help section
+===================
+
+Overview
+--------
+The `scp` tool facilitates the processing of the mass spectrometry-based single cell proteomics (SCP) data. It builds on the `scp` R package developed in the laboratory of prof. Laurent Gatto and provides functions for the peptide-to-spectrum match (PSM), peptide or protein-level filtering, normalization, transformation and imputation of missing values.
+
+The source code can be found in the following Github repository or on BioConductor:
+.. _GitHub: https://github.com/UCLouvain-CBIO/scp/
+.. _issues: https://github.com/UCLouvain-CBIO/scp/issues
+.. _Bioconductor: https://www.bioconductor.org/packages/release/bioc/html/scp.html
+
+Workflow
+--------
+
+The scp workflow currently supports the processing of MaxQuant results and requires two input files:
+
+- evidence.txt file (output from MaxQuant)
+- sampleAnnotation file (provided by user). The SampleAnnotation file is a metadata file, describing annotation of individual samples (such as quantification column names, batches, sample types, etc.). Please note, that the run identifier column MUST be present in both evidence and sampleAnnotation files.
+
+The workflow starts at the level of PSM. Firstly, the data are filtered extensively to keep only the most reliable identifications: reverse sequences and potential contaminants are removed, as well as PSMs below certain parental ion fraction threshold or not passing a q-value threshold. Also batches with very few features are excluded.
+
+Subsequently, PSMs are aggregated to peptide level. On the peptide level, another filtering is applied based on median relative intensity or median CV. Peptide-level intensities are then normalized and log2 transformed.
+
+Such intensities are then further aggregated to the protein level, where they undergo another normalization and imputation of missing values.
+
+Because of the unavoidable batch effects present in the single-cell data, scp offers two methods for the batch correction: ComBat and removeBatchEffect() from the limma package.
+
+Finally, dimenson reduction such as PCA or UMAP (on the PCA components) is provided. PCA and UMAP plots are then provided alongside with the (optional) quality controls plots within the `Plots` collection.
+
+Final log2 transformed, normalized, imputed and batch-corrected data are provided, with the option to export also intermediate results.
+
+Due to the internal complexity of data formats handling, we opted for one form with pre-defined settings for the whole processing pipeline. However, we highly recommend to check also QC plots and intermediate results and based on that adjust the workflow settings.
+</token>
+</macros>
\ No newline at end of file

diff -r 000000000000 -r cd2f3a280463 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Jan 22 07:44:00 2025 +0000

b'@@ -0,0 +1,239 @@\n+<macros>\n+ <token name="@TOOL_VERSION@">1.16.0</token>\n+ <xml name="creator">\n+ <creator>\n+ <person\n+ givenName="Kristina"\n+ familyName="Gomoryova"\n+ url="https://github.com/KristinaGomoryova"\n+ identifier="0000-0003-4407-3917" />\n+ <person\n+ givenName="Helge"\n+ familyName="Hecht"\n+ url="https://github.com/hechth"\n+ identifier="0000-0001-6744-996X" />\n+ <organization\n+ url="https://www.recetox.muni.cz/"\n+ email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"\n+ name="RECETOX MUNI" />\n+ </creator>\n+ </xml>\n+\n+ <xml name="aggregation_options">\n+ <option value="matrixStats::colMedians" selected="true">Aggregate using the median of each sample (colMedians)</option> \n+ <option value="MsCoreUtils::medianPolish">Fit an additive model (two way decomposition) using Tukey\'s median polish procedure (medianPolish)</option>\n+ <option value="BiocGenerics::colMeans">Aggregate using the mean of each sample (colMeans)</option>\n+ <option value="BiocGenerics::colSums">Aggregate using the sum of each sample (colSums)</option>\n+ <option value="MsCoreUtils::robustSummary">Calculate a robust aggregation using MASS::rlm() (robustSummary)</option>\n+ </xml>\n+\n+ <xml name="normalization_options">\n+ <option value="center.mean" selected="true">Center sample intensities by subtracting the respective column means (center.mean)</option> \n+ <option value="sum">Divide each feature\'s intensity by the sum of the feature (sum)</option>\n+ <option value="max">Divide each feature\'s intensity by the maximum of the feature (max)</option>\n+ <option value="center.median">Center sample intensities by subtracting the respective column medians (center.median)</option>\n+ <option value="div.mean">Divide by the column means (div.mean)</option>\n+ <option value="div.median">Divide by the column medians (div.median)</option>\n+ <option value="diff.median">Center all samples so that they all match the grand median by subtracting the respective columns medians differences to the grand median (diff.median)</option>\n+ <option value="quantiles">Quantile normalization (quantiles)</option>\n+ <option value="vsn">Variance-stabilizing normalization (vsn)</option>\n+ </xml>\n+\n+ <xml name="scp_param">\n+ <param name="input_data" type="data" format="tabular" label="Input evidence table" help= "Input file is the evidence.txt table from MaxQuant"/>\n+ <param name="input_annotations" type="data" format="tabular" label="Sample annotations table" help= "A data table specifying metadata; sample annotations."/>\n+ <param name="runcol" type="data_column" data_ref="input_data" use_header_names="true" label="Which column specifies the run identifier and batch name?" help="Column to specify both the run identifier and batch name, has to be present in both tables."/>\n+ <param name="remove_empty_columns" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Remove empty columns?" help="Whether the empty columns should be removed."/>\n+ <param name="generate_QC_plots" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Generate QC plots?" help="Whether to generate quality-control plots (distribution of the average SCR, distribution of median CV and median intensities)."/>\n+\n+ <section name="filtering_data" title="Data Filtering" expanded="true">\n+ <param name="filter_reverse" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Filter reverse sequences?" help="Whether to filter the proteins labelled as \'reverse\'."/>\n+ <param name="filter_contaminants" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Filter potential conta'..b'ier and batch name, has to be present in both tables."/>\n+ </when>\n+ <when value="removebatcheffect">\n+ <param name="preserve_col" type="data_column" data_ref="input_annotations" use_header_names="true" label="Which column is the variable to be preserved?" help="Which column is the variable to be preserved."/>\n+ <param name="batch_col" type="data_column" data_ref="input_annotations" use_header_names="true" label="Which column is the technical variable to be corrected?" help="Column to specify both the run identifier and batch name, has to be present in both tables."/>\n+ </when>\n+ </conditional>\n+ </section>\n+\n+ <section name="dimensionality_reduction" title="Dimensionality reduction" expanded="true">\n+ <conditional name="PCA_computation">\n+ <param type="select" label="Run principal component analysis (PCA)?" name="run_PCA" display="radio" help="Run the PCA on imputed batch-corrected protein intensities?">\n+ <option value="yes" selected="true">yes</option> \n+ <option value="no">no</option>\n+ </param> \n+ <when value="yes">\n+ <param label="Number of components" name="ncomponents_PCA" type="integer" value="5" min="2" help="Number of components in the PCA analysis."/>\n+ <param name="pca_coloring" type="data_column" data_ref="input_annotations" use_header_names="true" label="What column to color the PCA according to?" help="Based on which column from the sampleAnnotation file should be the PCA colored."/>\n+ <conditional name="UMAP_computation">\n+ <param type="select" label="Run UMAP on PCA data?" name="run_UMAP" display="radio" help="Run the UMAP on PCA-reduced data?">\n+ <option value="yes" selected="true">yes</option> \n+ <option value="no">no</option>\n+ </param> \n+ <when value="yes">\n+ <param label="Number of components" name="ncomponents_UMAP" type="integer" value="2" min="2" help="Number of components in the UMAP analysis."/>\n+ </when>\n+ <when value="no"/>\n+ </conditional>\n+ </when>\n+ <when value="no"/> \n+ </conditional>\n+ </section>\n+\n+ <section name="data_export" title="Export data" expanded="true">\n+ <param name="export_tables" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Export intermediate results" help="Whether only the final result table (log2 transformed, normalized, imputed, batch-corrected data) should be exported or all intermediate results."/>\n+ <param name="export_RData" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Export scp object as .rds" help="Whether to export the scp object as rds file format."/>\n+ <param name="export_R_script" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Export the R script to reproduce the analysis"\n+ help="Check this box to export the script executed in the Galaxy tool as an R file to be able to reproduce the same processing offline. Not that in this case, the file paths need to be altered and all the dependencies have to be managed manually."/>\n+ </section>\n+ </xml>\n+\n+ <xml name="citations">\n+ <citations>\n+ <citation type="doi">10.1002/cpz1.658</citation>\n+ <citation type="doi">10.1080/14789450.2021.1988571</citation>\n+ <citation type="doi">10.1021/acs.jproteome.3c00227</citation>\n+ <citation type="doi">10.1007/978-1-0716-3934-4_14</citation>\n+ <citation type="doi">10.1101/2023.12.14.571792</citation>\n+ </citations> \n+ </xml>\n+\n+</macros>\n'

diff -r 000000000000 -r cd2f3a280463 test-data/PCA.pdf

Binary file test-data/PCA.pdf has changed

diff -r 000000000000 -r cd2f3a280463 test-data/QC_boxplot.pdf

Binary file test-data/QC_boxplot.pdf has changed

diff -r 000000000000 -r cd2f3a280463 test-data/QC_boxplot_peptide.pdf

Binary file test-data/QC_boxplot_peptide.pdf has changed

diff -r 000000000000 -r cd2f3a280463 test-data/QC_boxplot_peptide_norm.pdf

Binary file test-data/QC_boxplot_peptide_norm.pdf has changed

diff -r 000000000000 -r cd2f3a280463 test-data/QC_boxplot_protein_norm.pdf

Binary file test-data/QC_boxplot_protein_norm.pdf has changed

diff -r 000000000000 -r cd2f3a280463 test-data/QC_heatmap_proteins.pdf

Binary file test-data/QC_heatmap_proteins.pdf has changed

diff -r 000000000000 -r cd2f3a280463 test-data/QC_medianCV.pdf

Binary file test-data/QC_medianCV.pdf has changed

diff -r 000000000000 -r cd2f3a280463 test-data/QC_plot_SCR.pdf

Binary file test-data/QC_plot_SCR.pdf has changed

diff -r 000000000000 -r cd2f3a280463 test-data/QC_plot_SCR_col.pdf

Binary file test-data/QC_plot_SCR_col.pdf has changed

diff -r 000000000000 -r cd2f3a280463 test-data/UMAP.pdf

Binary file test-data/UMAP.pdf has changed

diff -r 000000000000 -r cd2f3a280463 test-data/evidence_subset.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/evidence_subset.txt Wed Jan 22 07:44:00 2025 +0000

b'@@ -0,0 +1,1000 @@\n+\tuid\tSequence\tLength\tModifications\tModified.sequence\tDeamidation..NQ..Probabilities\tOxidation..M..Probabilities\tDeamidation..NQ..Score.Diffs\tOxidation..M..Score.Diffs\tAcetyl..Protein.N.term.\tDeamidation..NQ.\tOxidation..M.\tMissed.cleavages\tProteins\tLeading.proteins\tLeading.razor.protein\tType\tRaw.file\tMS.MS.m.z\tCharge\tm.z\tMass\tResolution\tUncalibrated...Calibrated.m.z..ppm.\tUncalibrated...Calibrated.m.z..Da.\tMass.error..ppm.\tMass.error..Da.\tUncalibrated.mass.error..ppm.\tUncalibrated.mass.error..Da.\tMax.intensity.m.z.0\tRetention.time\tRetention.length\tCalibrated.retention.time\tCalibrated.retention.time.start\tCalibrated.retention.time.finish\tRetention.time.calibration\tMatch.time.difference\tMatch.m.z.difference\tMatch.q.value\tMatch.score\tNumber.of.data.points\tNumber.of.scans\tNumber.of.isotopic.peaks\tPIF\tFraction.of.total.spectrum\tBase.peak.fraction\tPEP\tMS.MS.count\tMS.MS.scan.number\tScore\tDelta.score\tCombinatorics\tIntensity\tReporter.intensity.corrected.1\tReporter.intensity.corrected.2\tReporter.intensity.corrected.3\tReporter.intensity.corrected.4\tReporter.intensity.corrected.5\tReporter.intensity.corrected.6\tReporter.intensity.corrected.7\tReporter.intensity.corrected.8\tReporter.intensity.corrected.9\tReporter.intensity.corrected.10\tReporter.intensity.corrected.11\tReporter.intensity.corrected.12\tReporter.intensity.corrected.13\tReporter.intensity.corrected.14\tReporter.intensity.corrected.15\tReporter.intensity.corrected.16\tReporter.intensity.1\tReporter.intensity.2\tReporter.intensity.3\tReporter.intensity.4\tReporter.intensity.5\tReporter.intensity.6\tReporter.intensity.7\tReporter.intensity.8\tReporter.intensity.9\tReporter.intensity.10\tReporter.intensity.11\tReporter.intensity.12\tReporter.intensity.13\tReporter.intensity.14\tReporter.intensity.15\tReporter.intensity.16\tReporter.intensity.count.1\tReporter.intensity.count.2\tReporter.intensity.count.3\tReporter.intensity.count.4\tReporter.intensity.count.5\tReporter.intensity.count.6\tReporter.intensity.count.7\tReporter.intensity.count.8\tReporter.intensity.count.9\tReporter.intensity.count.10\tReporter.intensity.count.11\tReporter.intensity.count.12\tReporter.intensity.count.13\tReporter.intensity.count.14\tReporter.intensity.count.15\tReporter.intensity.count.16\tReporter.PIF\tReporter.fraction\tReverse\tPotential.contaminant\tid\tProtein.group.IDs\tPeptide.ID\tMod..peptide.ID\tMS.MS.IDs\tBest.MS.MS\tDeamidation..NQ..site.IDs\tOxidation..M..site.IDs\tModified.sequence.y\tRaw.file.y\tRetention.time.y\tPEP.y\tCharge.y\tProteins.y\tRetention.length.y\tid.y\tremove\tdart_PEP\tdart_qval\trazor_protein_fdr\tpeptide\tprotein\tDeamidation..N..Probabilities\tDeamidation..N..Score.Diffs\tDeamidation..N.\tGene.names\tProtein.names\tReporter.intensity.corrected.0\tReporter.intensity.count.0\tAIF.MS.MS.IDs\tDeamidation..N..site.IDs\tinput_id\trt_minus\trt_plus\tmu\tmuij\tsigmaij\tpep_new\texp_id\tpeptide_id\tstan_peptide_id\texclude\tresidual\tparticipated\r\n+1\t_(Acetyl (Protein N-term))ATNFLAHEK_ 2 0.00052636 190321S_LCA10_X_FP97AG\tATNFLAHEK\t9\tAcetyl (Protein N-term)\t_(Acetyl (Protein N-term))ATNFLAHEK_\t\t\t\t\t1\t0\t0\t0\tsp|P29692|EF1D_HUMAN\tsp|P29692|EF1D_HUMAN\tP29692\tMULTI-MSMS\t190321S_LCA10_X_FP97AG\t651.3558227\t2\t536.774731\t1071.53491\t43991.64098\t-0.26173\t-0.00014049\t213460\t114.58\t213460\t114.58\t651.3558818\t65.781\t0.37344\t65.781\t65.617\t65.991\t-1.42E-14\tNA\tNA\tNA\tNA\t14\t6\t3\t0.59752515\t0.001180528\t0.012092621\t0.00052636\t1\t9151\t41.029\t11.456\t1\t12315000\t61251\t501.71\t3731.3\t1643.3\t871.84\t981.87\t1200.1\t939.06\t1457.5\t1329.8\t981.83\tNA\tNA\tNA\tNA\tNA\t61251\t501.71\t3731.3\t1643.3\t871.84\t981.87\t1200.1\t939.06\t1457.5\t1329.8\t981.83\tNA\tNA\tNA\tNA\tNA\t1\t1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNA\tNA\tNA\tNA\tNA\tNA\tNA\t\t\t77968\t16124\t5851\t5943\t78377\t78377\t\t\t_(Acetyl (Protein N-term))ATNFLAHEK_\t190321S_LCA10_X_FP97AG\t65.781\t0.00052636\t2\tP29692\t0.37344\t90680\tFALSE\t7.4914146847758e-06\t1\t1.47235585669173e-15\t_(Acetyl (Protein N-term))ATNFLAHEK_2\tP29692\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\r\n+2\t_(Acetyl (Protein N-term))ATNFLAHEK_ 2 0.00058789 190222S_LCA9_X_FP94BM\tATNFLAHEK\t9\tAcetyl (P'..b'\t190222S_LCA9_X_FP94BM\t704.39104\t2\t475.227279\t948.440005\t42294.63789\t1.9078\t0.00090665\t482210\t229.16\t482220\t229.16\t704.3897351\t51.946\t0.2216\t51.946\t51.794\t52.016\t7.1054e-15\tNA\tNA\tNA\tNA\t20\t7\t4\t0.951874817\t0.000809604\t0.002002092\t1.3135\t1\t9642\t26.384\t4.8773\t1\t4495500\t59513\t412.94\t4074.8\t0\t318.32\t765.44\t0\t1156.3\t412.86\t724.63\t582.66\tNA\tNA\tNA\tNA\tNA\t59513\t412.94\t4074.8\t0\t318.32\t765.44\t0\t1156.3\t412.86\t724.63\t582.66\tNA\tNA\tNA\tNA\tNA\t1\t1\t1\t0\t1\t1\t0\t1\t1\t1\t1\tNA\tNA\tNA\tNA\tNA\tNA\tNA\t\t\t587888\t18546\t63605\t64510\t592372\t592372\t\t\t_TGDEKDVSV_\t190222S_LCA9_X_FP94BM\t51.946\t1.3135\t2\tQ16543\t0.2216\t600600\tFALSE\t1\t0.122672369\t0\t_TGDEKDVSV_2\tQ16543\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\r\n+996\t_TGDEKDVSV_ 2 1.4057 190321S_LCA10_X_FP97AG\tTGDEKDVSV\t9\tUnmodified\t_TGDEKDVSV_\t\t\t\t\t0\t0\t0\t1\tsp|Q16543|CDC37_HUMAN\tsp|Q16543|CDC37_HUMAN\tQ16543\tMULTI-MSMS\t190321S_LCA10_X_FP97AG\t704.3917114\t2\t475.227279\t948.440005\t41629.04377\t-0.62025\t-0.00029476\t482220\t229.16\t482220\t229.16\t704.391207\t53.41\t0.20345\t53.41\t53.329\t53.533\t0\tNA\tNA\tNA\tNA\t10\t4\t3\t0.977333471\t0.001403587\t0.005665649\t1.4057\t1\t6776\t22.237\t1.9076\t1\t4122700\t72721\t377.8\t5224.1\t1576.9\t941.47\t707.68\t508.2\t1252\t1232.4\t359.72\t402.2\tNA\tNA\tNA\tNA\tNA\t72721\t377.8\t5224.1\t1576.9\t941.47\t707.68\t508.2\t1252\t1232.4\t359.72\t402.2\tNA\tNA\tNA\tNA\tNA\t1\t1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNA\tNA\tNA\tNA\tNA\tNA\tNA\t\t\t587903\t18546\t63605\t64510\t592387\t592387\t\t\t_TGDEKDVSV_\t190321S_LCA10_X_FP97AG\t53.41\t1.4057\t2\tQ16543\t0.20345\t600615\tFALSE\t1\t0.124288553\t0\t_TGDEKDVSV_2\tQ16543\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\r\n+997\t_TGEGFYK_ 2 0.59986 190321S_LCA10_X_FP97AG\tTGEGFYK\t7\tUnmodified\t_TGEGFYK_\t\t\t\t\t0\t0\t0\t0\tsp|Q16836|HCDH_HUMAN\tsp|Q16836|HCDH_HUMAN\tQ16836\tMSMS\t190321S_LCA10_X_FP97AG\t630.3558838\t2\t401.192511\t800.370469\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\t57.123\t1\t57.123\t56.623\t57.623\t0\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\t0.59986\t1\t7473\t34.824\t1.9836\t1\tNA\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\tNA\tNA\tNA\tNA\tNA\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\tNA\tNA\tNA\tNA\tNA\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\tNA\tNA\tNA\tNA\tNA\tNA\tNA\t\t\t588083\t18612\t63626\t64531\t592578\t592578\t\t\t_TGEGFYK_\t190321S_LCA10_X_FP97AG\t57.123\t0.59986\t2\tQ16836\t1\t600795\tFALSE\t0.01604651\t0.000944686\t1.14262997016805e-09\t_TGEGFYK_2\tQ16836\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\r\n+998\t_TGQPHPR_ 2 0.4556 190222S_LCA9_X_FP94BM\tTGQPHPR\t7\tUnmodified\t_TGQPHPR_\t\t\t\t\t0\t0\t0\t0\tsp|Q86UP3|ZFHX4_HUMAN\tsp|Q86UP3|ZFHX4_HUMAN\tQ86UP3\tMULTI-MSMS\t190222S_LCA9_X_FP94BM\t511.2922851\t2\t396.709194\t791.403835\t50807.74757\t1.8603\t0.00073798\t288830\t114.58\t288840\t114.58\t511.2919213\t59.273\t0.5487\t59.273\t58.959\t59.508\t7.1054e-15\tNA\tNA\tNA\tNA\t32\t19\t3\t0.694139939\t0.001174277\t0.003099663\t0.4556\t1\t10845\t15.221\t0\t1\t1400800\t14153\t4255.3\t847.93\t772.64\t667.61\t601.1\t277.76\t0\t360.03\t566.31\t790.13\tNA\tNA\tNA\tNA\tNA\t14153\t4255.3\t847.93\t772.64\t667.61\t601.1\t277.76\t0\t360.03\t566.31\t790.13\tNA\tNA\tNA\tNA\tNA\t1\t1\t1\t1\t1\t1\t1\t0\t1\t1\t1\tNA\tNA\tNA\tNA\tNA\tNA\tNA\t\t\t589283\t20621\t63786\t64692\t593779\t593779\t\t\t_TGQPHPR_\t190222S_LCA9_X_FP94BM\t59.273\t0.4556\t2\tQ86UP3\t0.5487\t601995\tTRUE\t0.4556\t0.048159597\t5.40173973875676e-08\t_TGQPHPR_2\tQ86UP3\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\r\n+999\t_TGVQTFR_ 2 0.45529 190321S_LCA10_X_FP97AG\tTGVQTFR\t7\tUnmodified\t_TGVQTFR_\t\t\t\t\t0\t0\t0\t0\tsp|P53621|COPA_HUMAN\tsp|P53621|COPA_HUMAN\tP53621\tMULTI-MSMS\t190321S_LCA10_X_FP97AG\t519.3008301\t2\t404.719227\t807.423902\t49016.22882\t0.0054201\t2.1936e-06\t283110\t114.58\t283110\t114.58\t519.3007128\t45.642\t0.22364\t45.642\t45.51\t45.734\t7.1054e-15\tNA\tNA\tNA\tNA\t29\t12\t3\t0.779854455\t0.003552763\t0.042351467\t0.45529\t1\t5276\t15.439\t4.0708\t1\t2813500\t23045\t12139\t2806.6\t2449.7\t1329.4\t1606.3\t1545.3\t1010.4\t1861.2\t1369.3\t1324.6\tNA\tNA\tNA\tNA\tNA\t23045\t12139\t2806.6\t2449.7\t1329.4\t1606.3\t1545.3\t1010.4\t1861.2\t1369.3\t1324.6\tNA\tNA\tNA\tNA\tNA\t1\t1\t1\t1\t1\t1\t1\t1\t1\t1\t1\tNA\tNA\tNA\tNA\tNA\tNA\tNA\t\t\t590278\t16919\t63892\t64799\t594776\t594776\t\t\t_TGVQTFR_\t190321S_LCA10_X_FP97AG\t45.642\t0.45529\t2\tP53621\t0.22364\t602990\tTRUE\t0.45529\t0.047855798\t1.45138530823393e-16\t_TGVQTFR_2\tP53621\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA\r\n'

diff -r 000000000000 -r cd2f3a280463 test-data/sampleAnnotation.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sampleAnnotation.txt Wed Jan 22 07:44:00 2025 +0000

@@ -0,0 +1,65 @@
+ runCol quantCols SampleType lcbatch sortday digest
+1 190222S_LCA9_X_FP94BM Reporter.intensity.1 Carrier LCA9 s8 N
+2 190222S_LCA9_X_FP94BM Reporter.intensity.2 Reference LCA9 s8 N
+3 190222S_LCA9_X_FP94BM Reporter.intensity.3 Unused LCA9 s8 N
+4 190222S_LCA9_X_FP94BM Reporter.intensity.4 Monocyte LCA9 s8 N
+5 190222S_LCA9_X_FP94BM Reporter.intensity.5 Blank LCA9 s8 N
+6 190222S_LCA9_X_FP94BM Reporter.intensity.6 Monocyte LCA9 s8 N
+7 190222S_LCA9_X_FP94BM Reporter.intensity.7 Macrophage LCA9 s8 N
+8 190222S_LCA9_X_FP94BM Reporter.intensity.8 Macrophage LCA9 s8 N
+9 190222S_LCA9_X_FP94BM Reporter.intensity.9 Macrophage LCA9 s8 N
+10 190222S_LCA9_X_FP94BM Reporter.intensity.10 Macrophage LCA9 s8 N
+11 190222S_LCA9_X_FP94BM Reporter.intensity.11 Macrophage LCA9 s8 N
+12 190222S_LCA9_X_FP94BM Reporter.intensity.12 Unused LCA9 s8 N
+13 190222S_LCA9_X_FP94BM Reporter.intensity.13 Unused LCA9 s8 N
+14 190222S_LCA9_X_FP94BM Reporter.intensity.14 Unused LCA9 s8 N
+15 190222S_LCA9_X_FP94BM Reporter.intensity.15 Unused LCA9 s8 N
+16 190222S_LCA9_X_FP94BM Reporter.intensity.16 Unused LCA9 s8 N
+17 190321S_LCA10_X_FP97AG Reporter.intensity.1 Carrier LCA10 s8 Q
+18 190321S_LCA10_X_FP97AG Reporter.intensity.2 Reference LCA10 s8 Q
+19 190321S_LCA10_X_FP97AG Reporter.intensity.3 Unused LCA10 s8 Q
+20 190321S_LCA10_X_FP97AG Reporter.intensity.4 Macrophage LCA10 s8 Q
+21 190321S_LCA10_X_FP97AG Reporter.intensity.5 Monocyte LCA10 s8 Q
+22 190321S_LCA10_X_FP97AG Reporter.intensity.6 Macrophage LCA10 s8 Q
+23 190321S_LCA10_X_FP97AG Reporter.intensity.7 Macrophage LCA10 s8 Q
+24 190321S_LCA10_X_FP97AG Reporter.intensity.8 Macrophage LCA10 s8 Q
+25 190321S_LCA10_X_FP97AG Reporter.intensity.9 Macrophage LCA10 s8 Q
+26 190321S_LCA10_X_FP97AG Reporter.intensity.10 Macrophage LCA10 s8 Q
+27 190321S_LCA10_X_FP97AG Reporter.intensity.11 Macrophage LCA10 s8 Q
+28 190321S_LCA10_X_FP97AG Reporter.intensity.12 Unused LCA10 s8 Q
+29 190321S_LCA10_X_FP97AG Reporter.intensity.13 Unused LCA10 s8 Q
+30 190321S_LCA10_X_FP97AG Reporter.intensity.14 Unused LCA10 s8 Q
+31 190321S_LCA10_X_FP97AG Reporter.intensity.15 Unused LCA10 s8 Q
+32 190321S_LCA10_X_FP97AG Reporter.intensity.16 Unused LCA10 s8 Q
+33 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.1 Carrier LCB3 s9 R
+34 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.2 Reference LCB3 s9 R
+35 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.3 Unused LCB3 s9 R
+36 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.4 Unused LCB3 s9 R
+37 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.5 Macrophage LCB3 s9 R
+38 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.6 Macrophage LCB3 s9 R
+39 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.7 Blank LCB3 s9 R
+40 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.8 Monocyte LCB3 s9 R
+41 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.9 Macrophage LCB3 s9 R
+42 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.10 Monocyte LCB3 s9 R
+43 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.11 Blank LCB3 s9 R
+44 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.12 Macrophage LCB3 s9 R
+45 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.13 Macrophage LCB3 s9 R
+46 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.14 Macrophage LCB3 s9 R
+47 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.15 Macrophage LCB3 s9 R
+48 190914S_LCB3_X_16plex_Set_21 Reporter.intensity.16 Macrophage LCB3 s9 R
+49 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.1 Blank LCA10 s8 NA
+50 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.2 Blank LCA10 s8 NA
+51 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.3 Blank LCA10 s8 NA
+52 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.4 Blank LCA10 s8 NA
+53 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.5 Blank LCA10 s8 NA
+54 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.6 Blank LCA10 s8 NA
+55 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.7 Blank LCA10 s8 NA
+56 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.8 Blank LCA10 s8 NA
+57 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.9 Blank LCA10 s8 NA
+58 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.10 Blank LCA10 s8 NA
+59 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.11 Blank LCA10 s8 NA
+60 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.12 Blank LCA10 s8 NA
+61 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.13 Blank LCA10 s8 NA
+62 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.14 Blank LCA10 s8 NA
+63 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.15 Blank LCA10 s8 NA
+64 190321S_LCA10_X_FP97_blank_01 Reporter.intensity.16 Blank LCA10 s8 NA

diff -r 000000000000 -r cd2f3a280463 utils.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.r Wed Jan 22 07:44:00 2025 +0000

[

@@ -0,0 +1,66 @@
+# Export intermediate results
+# Function to export a single assay with metadata
+export_assay_with_metadata <- function(qf, assay_name) {
+    # Extract assay data, row metadata, and col metadata
+    assay_data <- SummarizedExperiment::assay(qf[[assay_name]])
+    row_metadata <- as.data.frame(SummarizedExperiment::rowData(qf[[assay_name]]))
+    col_metadata <- as.data.frame(SummarizedExperiment::colData(qf))
+    # Combine row metadata with assay data
+    export_data <- cbind(RowNames = rownames(assay_data), row_metadata, as.data.frame(assay_data))
+    # Save the table to a CSV file
+    output_file <- file.path("outputs", paste0(assay_name, "_export.txt"))
+    write.table(export_data, output_file, row.names = FALSE, sep = "\t", quote = F)
+}
+
+# Export all assays
+export_all_assays <- function(qf) {
+    # Get the names of all assays
+    # assay_names <- names(assays(qf))
+    assay_names <- c("peptides", "peptides_norm", "peptides_log", "proteins", "proteins_norm", "proteins_imptd")
+    dir.create("outputs")
+    # Export each assay
+    for (assay_name in assay_names) {
+        export_assay_with_metadata(qf, assay_name)
+    }
+}
+
+# Plot the QC boxplots
+create_boxplots <- function(scp, i, is_log2, name) {
+    sce <- scp[[i]]
+    assay_data <- as.data.frame(SummarizedExperiment::assay(sce)) |>
+        tibble::rownames_to_column("FeatureID")
+    col_data <- as.data.frame(SummarizedExperiment::colData(scp)) |>
+        tibble::rownames_to_column("SampleID")
+    long_data <- assay_data |>
+        tidyr::pivot_longer(
+            cols = -FeatureID,
+            names_to = "SampleID",
+            values_to = "Value"
+        )
+    long_data <- long_data |>
+        dplyr::left_join(col_data, by = "SampleID")
+    if (is_log2 == TRUE) {
+        long_data$Value <- log2(long_data$Value)
+    }
+    long_data |>
+        dplyr::filter(Value != "NaN") |>
+        ggplot2::ggplot(ggplot2::aes(x = runCol, y = Value, fill = SampleType)) +
+        ggplot2::geom_boxplot() +
+        ggplot2::theme_bw() +
+        ggplot2::labs(
+            title = name,
+            x = "Run",
+            y = "Log2 intensity"
+        ) +
+        ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1))
+}
+
+# Heatmap
+plot_heatmap <- function(scp, i) {
+    sce <- scp[[i]]
+    heatmap_mat <- as.matrix(SummarizedExperiment::assay(sce))
+    heatmap_mat[is.na(heatmap_mat)] <- 0
+    heatmap_bin <- ifelse(heatmap_mat > 0, 1, 0)
+    colnames(heatmap_bin) <- gsub("Reporter.intensity.", "", colnames(heatmap_bin))
+    heatmap(heatmap_bin, scale = "none", col = c("white", "black"), labRow = FALSE, margins = c(10, 5), cexCol = 0.5)
+}