Mercurial > repos > goeckslab > cleaning_spatialge

diff preprocessing.xml @ 0:c84663d92248 draft default tip
planemo upload for repository https://github.com/goeckslab/tools-st/tree/main/tools/spatialge commit 482b2e0e6ca7aaa789ba07b8cd689da9a01532ef
author: goeckslab
date: Wed, 13 Aug 2025 19:32:05 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocessing.xml	Wed Aug 13 19:32:05 2025 +0000
@@ -0,0 +1,665 @@
+<tool id="cleaning_spatialGE" name="spatialGE Preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01">
+    <description>Initial data preparation for downstream spatial transcriptomic analyses</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="spatialge_requirements"/>
+
+    <command detect_errors="aggressive"><![CDATA[
+
+    ##---------------------------------------------------------
+    ## VISIUM INPUT HANDLING
+    ##---------------------------------------------------------
+    
+    mkdir counts_dir &&
+
+    #if str($platform) == 'visium':
+
+        ## symlinking metadata file
+        #if $visium_metadata
+            ln -s '$visium_metadata' '${visium_metadata.name}' &&
+        #end if
+
+        ## looping over each visium sample provided
+        #for $v in $visium_samples:
+
+            ## create counts_dir with specific visium sample name
+            #set current_sample_dir = 'counts_dir/' + str($v.visium_sample_name)
+
+            ## make directory to hold spatial subdir
+            mkdir -p '$current_sample_dir/spatial' &&
+
+            ## loop over each file in the visium file collection, if ends with .h5 separate from other files
+            #for $f in $v.visium_collection:
+                #if f.name.endswith('h5')
+                    ln -s '$f' '$current_sample_dir/${f.name}' &&
+                #end if
+            #end for
+
+            ## other files in visium file collection added to spatial subdir
+            #for $f in $v.visium_collection:
+                #if f.name.endswith('png') or f.name.endswith('csv') or f.name.endswith('json')
+                    ln -s '$f' '$current_sample_dir/spatial/${f.name}' &&
+                #end if
+            #end for
+
+        #end for
+        
+        Rscript '$__tool_directory__/spatialGE_multiple_input.R'
+
+        ## counts will now maintain .h5 and spatial subdir structure
+        --counts counts_dir
+
+        --meta '${visium_metadata.name}'
+
+        #if str($distribution_plots.plot) == 'raw_plot':
+            --distplot
+            --plotmeta '${distribution_plots.plotmeta}'
+            #if $distribution_plots.samples
+                --samples '${distribution_plots.samples}'
+            #end if
+        #end if
+
+        #if str($spot_filtering.filter) == 'filter':
+            --filter
+            #if $spot_filtering.spot_min_reads
+            --sminreads '${spot_filtering.spot_min_reads}'
+            #end if
+            #if $spot_filtering.spot_max_reads
+            --smaxreads '${spot_filtering.spot_max_reads}'
+            #end if
+            #if $spot_filtering.spot_min_genes
+            --smingenes '${spot_filtering.spot_min_genes}'
+            #end if
+            #if $spot_filtering.spot_max_genes
+            --smaxgenes '${spot_filtering.spot_max_genes}'
+            #end if
+            #if $spot_filtering.gene_min_reads
+            --gminreads '${spot_filtering.gene_min_reads}'
+            #end if
+            #if $spot_filtering.gene_max_reads
+            --gmaxreads '${spot_filtering.gene_max_reads}'
+            #end if
+            #if $spot_filtering.gene_min_spots
+            --gminspots '${spot_filtering.gene_min_spots}'
+            #end if
+            #if $spot_filtering.gene_max_spots
+            --gmaxspots '${spot_filtering.gene_max_spots}'
+            #end if
+        #end if
+
+        #if str($filtered_distribution_plots.plot) == 'filtered_plot':
+            --filterplot
+            --plotmeta '${filtered_distribution_plots.plotmeta}'
+            #if $filtered_distribution_plots.samples
+            --samples '${filtered_distribution_plots.samples}'
+            #end if
+        #end if
+
+            --type '$transformation';
+
+    #end if
+
+    ##-------------------------------------------
+    ## SINGLE COSMX INPUT HANDLING
+    ##-------------------------------------------
+    
+    #if str($platform) == 'cosmx' and str($cosmx_file_quantity) == 'single_cosmx_input':
+
+        ## symlink counts and coords files    
+        ln -s '$single_cosmx_counts' '${single_cosmx_counts.name}' &&
+        ln -s '$single_cosmx_spotcoords' '${single_cosmx_spotcoords.name}' &&
+
+        Rscript '$__tool_directory__/spatialGE_single_input.R'
+
+        #if $single_cosmx_counts
+            --counts '${single_cosmx_counts.name}'
+        #end if
+
+        #if $single_cosmx_spotcoords
+            --spots '${single_cosmx_spotcoords.name}'
+        #end if
+
+        --names '$cosmx_sample_names'
+
+        #if str($distribution_plots.plot) == 'raw_plot':
+            --distplot
+            --plotmeta '${distribution_plots.plotmeta}'
+            #if $distribution_plots.samples
+                --samples '${distribution_plots.samples}'
+            #end if
+        #end if
+
+        #if str($spot_filtering.filter) == 'filter':
+            --filter
+            #if $spot_filtering.spot_min_reads
+            --sminreads '${spot_filtering.spot_min_reads}'
+            #end if
+            #if $spot_filtering.spot_max_reads
+            --smaxreads '${spot_filtering.spot_max_reads}'
+            #end if
+            #if $spot_filtering.spot_min_genes
+            --smingenes '${spot_filtering.spot_min_genes}'
+            #end if
+            #if $spot_filtering.spot_max_genes
+            --smaxgenes '${spot_filtering.spot_max_genes}'
+            #end if
+            #if $spot_filtering.gene_min_reads
+            --gminreads '${spot_filtering.gene_min_reads}'
+            #end if
+            #if $spot_filtering.gene_max_reads
+            --gmaxreads '${spot_filtering.gene_max_reads}'
+            #end if
+            #if $spot_filtering.gene_min_spots
+            --gminspots '${spot_filtering.gene_min_spots}'
+            #end if
+            #if $spot_filtering.gene_max_spots
+            --gmaxspots '${spot_filtering.gene_max_spots}'
+            #end if
+        #end if
+
+        #if str($filtered_distribution_plots.plot) == 'filtered_plot':
+            --filterplot
+            --plotmeta '${filtered_distribution_plots.plotmeta}'
+            #if $filtered_distribution_plots.samples
+            --samples '${filtered_distribution_plots.samples}'
+            #end if
+        #end if
+
+            --type '$transformation'
+
+    #end if
+
+    ##---------------------------------------------------------
+    ## MULTIPLE COSMX INPUT HANDLING
+    ##---------------------------------------------------------
+
+    #if str($platform) == 'cosmx' and str($cosmx_file_quantity) == 'multiple_cosmx_input':
+
+        mkdir coords_dir &&
+
+        ## loop over each count and coord file, and symlink
+        #if str($cosmx_file_selection.cosmx_file_quantity) == 'multiple_cosmx_input':
+            #for $cf in $multiple_cosmx_counts:
+                ln -s '$cf' counts_dir/'${cf.name}' &&
+            #end for
+            #for $sc in $multiple_cosmx_spotcoords:
+                ln -s '$sc' coords_dir/'${sc.name}' &&
+            #end for
+        #end if
+
+        Rscript '$__tool_directory__/spatialGE_multiple_input.R'
+
+        #if $cosmx_file_selection.multiple_cosmx_counts    
+            --counts counts_dir/
+        #end if
+
+        #if $cosmx_file_selection.multiple_cosmx_spotcoords
+            --spots coords_dir/
+        #end if
+
+        #if $cosmx_sample_names
+            --names '${cosmx_sample_names}'
+        #end if
+
+        #if str($distribution_plots.plot) == 'raw_plot':
+            --distplot
+            --plotmeta '${distribution_plots.plotmeta}'
+            #if $distribution_plots.samples
+                --samples '${distribution_plots.samples}'
+            #end if
+        #end if
+
+        #if str($spot_filtering.filter) == 'filter':
+            --filter
+            #if $spot_filtering.spot_min_reads
+            --sminreads '${spot_filtering.spot_min_reads}'
+            #end if
+            #if $spot_filtering.spot_max_reads
+            --smaxreads '${spot_filtering.spot_max_reads}'
+            #end if
+            #if $spot_filtering.spot_min_genes
+            --smingenes '${spot_filtering.spot_min_genes}'
+            #end if
+            #if $spot_filtering.spot_max_genes
+            --smaxgenes '${spot_filtering.spot_max_genes}'
+            #end if
+            #if $spot_filtering.gene_min_reads
+            --gminreads '${spot_filtering.gene_min_reads}'
+            #end if
+            #if $spot_filtering.gene_max_reads
+            --gmaxreads '${spot_filtering.gene_max_reads}'
+            #end if
+            #if $spot_filtering.gene_min_spots
+            --gminspots '${spot_filtering.gene_min_spots}'
+            #end if
+            #if $spot_filtering.gene_max_spots
+            --gmaxspots '${spot_filtering.gene_max_spots}'
+            #end if
+        #end if
+
+        #if str($filtered_distribution_plots.plot) == 'filtered_plot':
+            --filterplot
+            --plotmeta '${filtered_distribution_plots.plotmeta}'
+            #if $filtered_distribution_plots.samples
+            --samples '${filtered_distribution_plots.samples}'
+            #end if
+        #end if
+
+            --type '$transformation'
+
+    #end if
+
+    ##---------------------------------------------------------
+    ## SINGLE RAW INPUT HANDLING
+    ##---------------------------------------------------------
+
+    #if str($platform) == 'raw_data' and str($raw_file_selection.raw_file_quantity) == 'single_raw_input':
+
+        ## symlink count, coord, and metadata files
+        ln -s '$single_raw_counts' '${single_raw_counts.name}' &&
+        ln -s '$single_raw_spotcoords' '${single_raw_spotcoords.name}' &&
+        ln -s '$raw_metadata' '${raw_metadata.name}' &&
+
+        Rscript '$__tool_directory__/spatialGE_single_input.R'
+    
+        #if $single_raw_counts
+            --counts '${single_raw_counts.name}'
+        #end if
+
+        #if $single_raw_spotcoords
+            --spots '${single_raw_spotcoords.name}'
+        #end if
+
+        #if $raw_metadata
+            --meta '${raw_metadata.name}'
+        #end if
+
+        #if str($distribution_plots.plot) == 'raw_plot':
+            --distplot
+            --plotmeta '${distribution_plots.plotmeta}'
+            #if $distribution_plots.samples
+                --samples '${distribution_plots.samples}'
+            #end if
+        #end if
+
+        #if str($spot_filtering.filter) == 'filter':
+            --filter
+            #if $spot_filtering.spot_min_reads
+            --sminreads '${spot_filtering.spot_min_reads}'
+            #end if
+            #if $spot_filtering.spot_max_reads
+            --smaxreads '${spot_filtering.spot_max_reads}'
+            #end if
+            #if $spot_filtering.spot_min_genes
+            --smingenes '${spot_filtering.spot_min_genes}'
+            #end if
+            #if $spot_filtering.spot_max_genes
+            --smaxgenes '${spot_filtering.spot_max_genes}'
+            #end if
+            #if $spot_filtering.gene_min_reads
+            --gminreads '${spot_filtering.gene_min_reads}'
+            #end if
+            #if $spot_filtering.gene_max_reads
+            --gmaxreads '${spot_filtering.gene_max_reads}'
+            #end if
+            #if $spot_filtering.gene_min_spots
+            --gminspots '${spot_filtering.gene_min_spots}'
+            #end if
+            #if $spot_filtering.gene_max_spots
+            --gmaxspots '${spot_filtering.gene_max_spots}'
+            #end if
+        #end if
+
+        #if str($filtered_distribution_plots.plot) == 'filtered_plot':
+            --filterplot
+            --plotmeta '${filtered_distribution_plots.plotmeta}'
+            #if $filtered_distribution_plots.samples
+            --samples '${filtered_distribution_plots.samples}'
+            #end if
+        #end if
+
+            --type '$transformation'
+
+    #end if
+
+    ##---------------------------------------------------------
+    ## MULTIPLE RAW INPUT HANDLING
+    ##---------------------------------------------------------
+
+    #if str($platform) == 'raw_data' and str($raw_file_selection.raw_file_quantity) == 'multiple_raw_input':
+
+        mkdir coords_dir &&
+
+        ## loop over each count and coord file, and symlink
+        #if str($raw_file_selection.raw_file_quantity) == 'multiple_raw_input':
+            #for $cf in $multiple_raw_counts:
+                ln -s '$cf' counts_dir/'${cf.name}' &&
+            #end for
+            #for $sc in $multiple_raw_spotcoords:
+                ln -s '$sc' coords_dir/'${sc.name}' &&
+            #end for
+        #end if
+
+        ln -s '$raw_metadata' '${raw_metadata.name}' &&
+
+        Rscript '$__tool_directory__/spatialGE_multiple_input.R'
+
+        #if $raw_file_selection.multiple_raw_counts    
+            --counts counts_dir/
+        #end if
+
+        #if $raw_file_selection.multiple_raw_spotcoords
+            --spots coords_dir/
+        #end if
+
+        #if $raw_metadata
+            --meta '${raw_metadata.name}'
+        #end if
+
+        #if str($distribution_plots.plot) == 'raw_plot':
+            --distplot
+            --plotmeta '${distribution_plots.plotmeta}'
+            #if $distribution_plots.samples
+                --samples '${distribution_plots.samples}'
+            #end if
+        #end if
+
+        #if str($spot_filtering.filter) == 'filter':
+            --filter
+            #if $spot_filtering.spot_min_reads
+            --sminreads '${spot_filtering.spot_min_reads}'
+            #end if
+            #if $spot_filtering.spot_max_reads
+            --smaxreads '${spot_filtering.spot_max_reads}'
+            #end if
+            #if $spot_filtering.spot_min_genes
+            --smingenes '${spot_filtering.spot_min_genes}'
+            #end if
+            #if $spot_filtering.spot_max_genes
+            --smaxgenes '${spot_filtering.spot_max_genes}'
+            #end if
+            #if $spot_filtering.gene_min_reads
+            --gminreads '${spot_filtering.gene_min_reads}'
+            #end if
+            #if $spot_filtering.gene_max_reads
+            --gmaxreads '${spot_filtering.gene_max_reads}'
+            #end if
+            #if $spot_filtering.gene_min_spots
+            --gminspots '${spot_filtering.gene_min_spots}'
+            #end if
+            #if $spot_filtering.gene_max_spots
+            --gmaxspots '${spot_filtering.gene_max_spots}'
+            #end if
+        #end if
+
+        #if str($filtered_distribution_plots.plot) == 'filtered_plot':
+            --filterplot
+            --plotmeta '${filtered_distribution_plots.plotmeta}'
+            #if $filtered_distribution_plots.samples
+            --samples '${filtered_distribution_plots.samples}'
+            #end if
+        #end if
+
+            --type '$transformation'
+
+    #end if
+
+    ]]></command>
+    <inputs>
+        <conditional name="platform_type">
+            <param name="platform" type="select" label="Select Input Type" >
+                <option value="visium">Visium</option>
+                <option value="cosmx">CosMX-SMI</option>
+                <option value="raw_data">Raw Counts and Coordinates</option>
+            </param>
+            <when value="visium">
+                <repeat name="visium_samples" title="Visium Sample" min="1">
+                    <param name="visium_sample_name" type="text" optional="false" label="Sample Name (sample ID/name in metadata file)" />
+                    <param name="visium_collection" type="data_collection" multiple="true" label="Visium Files (h5, png, json, csv)" />
+                </repeat>
+                <param name="visium_metadata" type="data" format="csv,tsv" label="Metadata" />
+            </when>
+            <when value="cosmx">
+                <conditional name="cosmx_file_selection">
+                    <param name="cosmx_file_quantity" type="select" label="Choose Input Quantity" >
+                        <option value="single_cosmx_input">Single Sample Input</option>
+                        <option value="multiple_cosmx_input">Multiple Sample Input</option>
+                    </param>
+                    <when value="single_cosmx_input">
+                        <param name="single_cosmx_counts" type="data" format="csv,tsv" label="Expression Matrix" />
+                        <param name="single_cosmx_spotcoords" type="data" format="csv,tsv" label="Metadata File" />
+                    </when>
+                    <when value="multiple_cosmx_input">
+                        <param name="multiple_cosmx_counts" type="data_collection" format="csv,tsv" label="Collection of Expression Matrices (one file per sample)" />
+                        <param name="multiple_cosmx_spotcoords" type="data_collection" format="csv,tsv" label="Collection of Metadata Files (one file per sample)" />
+                    </when>
+                </conditional>
+                <param name="cosmx_sample_names" type="text" optional="false" label="Sample Name(s)" />
+            </when>
+            <when value="raw_data">
+                <conditional name="raw_file_selection">
+                    <param name="raw_file_quantity" type="select" label="Choose Input Quantity" >
+                        <option value="single_raw_input">Single Sample Input</option>
+                        <option value="multiple_raw_input">Multiple Sample Input</option>
+                    </param>
+                    <when value="single_raw_input">
+                        <param name="single_raw_counts" type="data" format="csv,tsv" label="Counts" />
+                        <param name="single_raw_spotcoords" type="data" format="csv,tsv" label="Spot Coordinates" />
+                    </when>
+                    <when value="multiple_raw_input">
+                        <param name="multiple_raw_counts" type="data_collection" format="csv,tsv" label="Collection of Counts Files (one per sample)" />
+                        <param name="multiple_raw_spotcoords" type="data_collection" format="csv,tsv" label="Collection of spot coord files (one per sample)" />
+                    </when>
+                </conditional>
+                <param name="raw_metadata" type="data" format="csv,tsv" label="Metadata" />
+            </when>
+        </conditional>
+        <conditional name="distribution_plots">
+            <param name="plot" type="select" label="Optional: Generate Distribution Plot of Raw Data" >
+                <option value="no_plot" selected="true">Do not generate distribution plot</option>
+                <option value="raw_plot">Generate distribution plot</option>
+            </param>
+            <when value="no_plot">
+            </when>
+            <when value="raw_plot">
+                <param name="plotmeta" type="select" label="Plot counts per cell or genes per cell" >
+                    <option value="total_counts">Total counts</option>
+                    <option value="total_genes">Total genes</option>
+                </param>
+                <param name="samples" type="text" optional="true" label="Optional subset of samples for distribution plotting (defaults to all)" />
+            </when>
+        </conditional>
+        <conditional name="spot_filtering">
+            <param name="filter" type="select" label="Optional: Perform Quality Control with Spot Filtering">
+                <option value="no_filter" selected="true">Do not perform filtering</option>
+                <option value="filter">Filter spots/cells</option>
+            </param>
+            <when value="no_filter">
+            </when>
+            <when value="filter">
+                <param name="spot_min_reads" type="integer" min="0" optional="true" label="Minimum number of total reads for a spot to be retained" />
+                <param name="spot_max_reads" type="integer" min="0" optional="true" label="Maximum number of total reads for a spot to be retained" />
+                <param name="spot_min_genes" type="integer" min="0" optional="true" label="Minimum number of genes expressed in a spot" />
+                <param name="spot_max_genes" type="integer" min="0" optional="true" label="Maximum number of genes expressed in a spot" />
+                <param name="gene_min_reads" type="integer" min="0" optional="true" label="Minimum number of total reads for a gene to be retained" />
+                <param name="gene_max_reads" type="integer" min="0" optional="true" label="Maximum number of total reads for a gene to be retained" />
+                <param name="gene_min_spots" type="integer" min="0" optional="true" label="Minimum number of spots present in a gene" />
+                <param name="gene_max_spots" type="integer" min="0" optional="true" label="Maximum number of spots present in a gene" />
+            </when>
+        </conditional>
+        <conditional name="filtered_distribution_plots">
+            <param name="plot" type="select" label="Optional: Generate Distribution Plot of Filtered Data" >
+                <option value="no_plot" selected="true">Do not generate distribution plot</option>
+                <option value="filtered_plot">Generate distribution plot</option>
+            </param>
+            <when value="no_plot">
+            </when>
+            <when value="filtered_plot">
+                <param name="plotmeta" type="select" label="Plot counts per cell or genes per cell" >
+                    <option value="total_counts">Total counts</option>
+                    <option value="total_genes">Total genes</option>
+                </param>
+                <param name="samples" type="text" optional="true" label="Optional subset of samples for distribution plotting (defaults to all)" />
+            </when>
+        </conditional>
+        <param name="transformation" type="select"  label="Data Transformation" >
+            <option value="log" selected="true">log</option>
+            <option value="sct">sct</option>
+        </param>
+    </inputs>
+    <outputs>
+        <collection name="raw_distribution_plot" type="list" label="Raw Data Distribution Plot">
+            <discover_datasets pattern="__name_and_ext__" directory="./unfiltered_distribution_plots" ext="png" />
+            <filter>distribution_plots['plot'] == "raw_plot"</filter>
+        </collection>
+        <collection name="filtered_dist_plot" type="list" label="Filtered Data Distribution Plot">
+            <discover_datasets pattern="__name_and_ext__" directory="./filtered_distribution_plots" ext="png" />
+            <filter>filtered_distribution_plots['plot'] == "filtered_plot"</filter>
+        </collection>
+        <data name="STlist_obj" format="rds" label="STlist.rds" from_work_dir="STobj.rds">
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="platform_type">
+                <param name="platform" value="raw_data" />
+                    <conditional name="raw_file_selection">
+                        <param name="raw_file_quantity" value="single_raw_input" />
+                            <param name="single_raw_counts" value="ST_mel3_rep1_counts.tsv" />
+                            <param name="single_raw_spotcoords" value="ST_mel3_rep1_mapping.tsv" />
+                    </conditional>
+                <param name="raw_metadata" ftype="csv" value="thrane_clinical.csv" />
+            </conditional>
+            <output name="STlist_obj" file="STobj_raw.rds" compare="sim_size">
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <conditional name="platform_type">
+                <param name="platform" value="raw_data" />
+                    <conditional name="raw_file_selection">
+                        <param name="raw_file_quantity" value="single_raw_input" />
+                            <param name="single_raw_counts" value="ST_mel3_rep1_counts.tsv" />
+                            <param name="single_raw_spotcoords" value="ST_mel3_rep1_mapping.tsv" />
+                    </conditional>
+                <param name="raw_metadata" ftype="csv" value="thrane_clinical.csv" />
+            </conditional>
+            <conditional name="distribution_plots">
+                <param name="plot" value="raw_plot" />
+                    <param name="plotmeta" value="total_counts" />
+            </conditional>
+            <output name="STlist_obj" file="STobj_raw.rds" compare="sim_size">
+            </output>
+            <output_collection name="raw_distribution_plot">
+                <element name="unfiltered_ST_mel3_rep1_counts" file="unfiltered_ST_mel3_rep1_counts.png" compare="sim_size" />
+            </output_collection>
+        </test>
+        <test expect_num_outputs="3">
+            <conditional name="platform_type">
+                <param name="platform" value="raw_data" />
+                    <conditional name="raw_file_selection">
+                        <param name="raw_file_quantity" value="single_raw_input" />
+                            <param name="single_raw_counts" value="ST_mel3_rep1_counts.tsv" />
+                            <param name="single_raw_spotcoords" value="ST_mel3_rep1_mapping.tsv" />
+                    </conditional>
+                <param name="raw_metadata" ftype="csv" value="thrane_clinical.csv" />
+            </conditional>
+            <conditional name="distribution_plots">
+                <param name="plot" value="raw_plot" />
+                    <param name="plotmeta" value="total_counts" />
+            </conditional>
+            <conditional name="spot_filtering">
+                <param name="filter" value="filter" />
+                    <param name="spot_min_reads" value="2000" />
+            </conditional>
+            <conditional name="filtered_distribution_plots">
+                <param name="plot" value="filtered_plot" />
+                    <param name="plotmeta" value="total_counts" />
+            </conditional>
+            <output name="STlist_obj" file="STobj_filtered.rds" compare="sim_size">
+            </output>
+            <output_collection name="raw_distribution_plot">
+                <element name="unfiltered_ST_mel3_rep1_counts" file="unfiltered_ST_mel3_rep1_counts.png" compare="sim_size" />
+            </output_collection>
+            <output_collection name="filtered_dist_plot">
+                <element name="filtered_ST_mel3_rep1_counts" file="filtered_ST_mel3_rep1_counts.png" compare="sim_size" />
+            </output_collection>
+        </test>
+    </tests>    
+    <help>
+        <![CDATA[
+    **What it does**
+
+    spatialGE is a tool designed for the analysis and visualization of spatially-resolved transcriptomics data.
+
+    spatialGE Preprocessing is built for data reorganization and filtering with exploratory analysis. Input data will be
+    transformed into an `STlist` object for downstream spatialGE analyses. Optional quality control can be performed by filtering spots/cells 
+    and genes to specific quanitities. Distribution plots of either total counts or total genes can be produced for both raw and filtered data.
+    Data transformation will prepare the data for later analysis.
+
+    **Input**
+
+    Visium:
+
+    - Sample Name: Name of Visium sample(s) that matches a sample ID in the associated metadata file.
+
+    - Visium Files: All file outputs from `spaceranger count`, must include .h5 file and .csv file from `spatial` subdirectory, and optionally including the .png and .json files (one group of files per sample). For multiple samples, select option "Insert Visium Sample".
+
+    - Metadata: Metadata file including sample ID/names for all input samples.
+
+    CosMX-SMI:
+
+    - Expression Matrix: `exprMat` file from CosMX-SMI output. If running multiple sample input, upload collection of `exprMat` files, one file per sample.
+
+    - Metadata: `metadata` file from CosMX-SMI output. If running multiple sample input, upload collection of `metadata` files, one file per sample.
+
+    - Sample Names: Sample name associated with CosMX-SMI output. If running multiple sample input, create a comma-separated list of sample names with one unique name per sample.
+
+    Raw Data:
+
+    - Counts: Raw count data file(s). If running multiple sample input, upload collection of files, one file per sample.
+
+    - Spot Coordinates: Raw coordinate file(s). If running multiple sample input, upload collection of files, one file per sample.
+
+    - Metadata: Metadata file associated with sample(s).
+
+
+    **Run modes**
+
+    Optional: Generate Distribution Plot of Raw Data
+
+        - Display violin distribution plot of samples
+
+        - Choose between plotting either total counts per spot/cell or total genes per spot/cell
+
+        - Can manually enter specific sample names to subset plot (will automatically plot all provided samples)
+
+    Optional: Perform Quality Control with Spot Filtering
+
+        - Perform quality control by filtering spots/cells
+
+        - Optional input for all filtering parameters, can provide quanitity for one to all parameters
+
+        - Specifying minimum and maximum spots/cells and/or genes will restrict the data
+
+    Optional: Generate Distribution Plot of Filtered Data
+
+        - Display violin distribution plot of samples after filtering data
+
+        - Choose between plotting either total counts per spot/cell or total genes per spot/cell
+
+        - Can manually enter specific sample names to subset plot (will automatically plot all provided samples). If sample names were specified in `Generate Distribution Plot of Raw Data`, same samples will be subset for plotting after filtering.
+
+    **Outputs**
+
+    - STlist Object RDS: saves the STlist object as an .rds file for downstream spatialGE analyses
+
+    - Raw Data Distribution Plot: distribution violin plot of all samples provided, displaying either total counts or total genes
+
+    - Filtered Data Distribution Plot: similar to raw data distribution plot, displaying distribution after quality control filtering
+
+        ]]>
+    </help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
author	goeckslab
date	Wed, 13 Aug 2025 19:32:05 +0000
parents
children