view scanpy-normalise-data.xml @ 24:439f7f939d46 draft default tip

"planemo upload for repository commit 9ea121884e1aecd2ee66a0686057cb3ed904b9eb-dirty"
author ebi-gxa
date Thu, 28 Oct 2021 09:59:09 +0000
parents 3a4564b9d685
line wrap: on
line source

<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_normalise_data" name="Scanpy NormaliseData" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
  <description>to make all cells having the same total expression</description>
  <expand macro="requirements"/>
  <command detect_errors="exit_code"><![CDATA[
ln -s '${input_obj_file}' input.h5 &&
PYTHONIOENCODING=utf-8 scanpy-normalise-data
    #if not $settings.default 
        #if not $settings.log_transform
        #end if
        #if $settings.scale_factor
            --normalize-to '${settings.scale_factor}'
        #end if
        #if $settings.key_added
            --key-added '${settings.key_added}'
        #end if
        #if $settings.exclude.exclude_highly_expressed
            --exclude-highly-expressed --max-fraction '${settings.exclude.max_fraction}'
        #end if
    #end if

    <expand macro="input_object_params"/>
    <expand macro="output_object_params"/>
    <expand macro="export_mtx_params"/>
    <expand macro="save_matrix_params"/>
     <conditional name="settings">
      <param name="default" type="boolean" checked="true" label="Use programme defaults"/>
      <when value="true"/>
      <when value="false">
        <param name="scale_factor" argument="--normalize-to" type="float" value="1e4" min="0" 
            label="Target number to normalise to" help="Aimed counts per cell after normalisation."/>
        <param name="log_transform" argument="--no-log-transform" type="boolean" truevalue="" falsevalue="--no-log-transform" checked="True"
             label="Apply log transform?" help="If enabled, will apply a log transformation following normalisation."/>
        <conditional name="exclude">
         <param name="exclude_highly_expressed" argument="--exclude-highly-expressed" type="boolean" checked="False" 
             label="Exclude highly expressed genes?" help="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell. A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to the number specified by --normalize-to."/>
           <when value="true">
             <param name="max_fraction" argument="--max-fraction" type="float" value="0.05" min="0" max="1" 
                 label="Consider cells as highly expressed that have more counts than max_fraction of the original total counts in at least one cell." />      
        <param name="layers" argument="--layers" type="text" optional="true"
             label="Comma-separated list of layers to normalize. Set to 'all' to normalize all layers."/>
        <param name="layer_norm" type="select" label="How to normalise layers" help="If None, after normalization, for each layer in layers each cell has a total count equal to the median of the counts_per_cell before normalization of the layer. If 'after', for each layer in layers each cell has a total count equal to the value of --normalize-to. If 'X', for each layer in layers each cell has a total count equal to the median of total counts for observations (cells) of adata.X before normalization." >
          <option value="" selected="true">None</option>
          <option value="X">X</option>
          <option value="after">after</option>
        <param name="key_added" argument="--key-added" type="text" optional="true"
             label="Name of the field in adata.obs where the normalization factor is stored. Default: don't store."/>

    <expand macro="output_data_obj" description="Normalised data"/>
    <expand macro="export_mtx_outputs"/>

      <param name="input_obj_file" value="filter_genes.h5"/>
      <param name="input_format" value="anndata"/>
      <param name="output_format" value="anndata"/>
      <param name="scale_factor" value="1e4"/>
      <param name="save_raw" value="false"/>
      <output name="output_h5" file="normalise_data.h5" ftype="h5" compare="sim_size"/>

Normalise total counts per cell (`scanpy.pp.normalize_total`)

Normalise each cell by total counts over all genes (excluding top expressed
genes if so required), so that every cell has the same total count after

Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.


  <expand macro="citations"/>