view scanpy-normalise-data.xml @ 19:3a4564b9d685 draft

"planemo upload for repository commit ebe77c8718ec65277f4dc0d71fa5f4c5677df62d-dirty"
author ebi-gxa
date Wed, 05 May 2021 12:11:45 +0000
parents 6aa97861fdfd
children 6b97ffba31da
line wrap: on
line source

<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_normalise_data" name="Scanpy NormaliseData" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
  <description>to make all cells having the same total expression</description>
  <expand macro="requirements"/>
  <command detect_errors="exit_code"><![CDATA[
ln -s '${input_obj_file}' input.h5 &&
PYTHONIOENCODING=utf-8 scanpy-normalise-data
    #if not $settings.default 
        #if not $settings.log_transform
        #end if
        #if $settings.scale_factor
            --normalize-to '${settings.scale_factor}'
        #end if
        #if $settings.key_added
            --key-added '${settings.key_added}'
        #end if
        #if $settings.exclude.exclude_highly_expressed
            --exclude-highly-expressed --max-fraction '${settings.exclude.max_fraction}'
        #end if
    #end if

    <expand macro="input_object_params"/>
    <expand macro="output_object_params"/>
    <expand macro="export_mtx_params"/>
    <expand macro="save_matrix_params"/>
     <conditional name="settings">
      <param name="default" type="boolean" checked="true" label="Use programme defaults"/>
      <when value="true"/>
      <when value="false">
        <param name="scale_factor" argument="--normalize-to" type="float" value="1e4" min="0" 
            label="Target number to normalise to" help="Aimed counts per cell after normalisation."/>
        <param name="log_transform" argument="--no-log-transform" type="boolean" truevalue="" falsevalue="--no-log-transform" checked="True"
             label="Apply log transform?" help="If enabled, will apply a log transformation following normalisation."/>
        <conditional name="exclude">
         <param name="exclude_highly_expressed" argument="--exclude-highly-expressed" type="boolean" checked="False" 
             label="Exclude highly expressed genes?" help="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell. A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to the number specified by --normalize-to."/>
           <when value="true">
             <param name="max_fraction" argument="--max-fraction" type="float" value="0.05" min="0" max="1" 
                 label="Consider cells as highly expressed that have more counts than max_fraction of the original total counts in at least one cell." />      
        <param name="layers" argument="--layers" type="text" optional="true"
             label="Comma-separated list of layers to normalize. Set to 'all' to normalize all layers."/>
        <param name="layer_norm" type="select" label="How to normalise layers" help="If None, after normalization, for each layer in layers each cell has a total count equal to the median of the counts_per_cell before normalization of the layer. If 'after', for each layer in layers each cell has a total count equal to the value of --normalize-to. If 'X', for each layer in layers each cell has a total count equal to the median of total counts for observations (cells) of adata.X before normalization." >
          <option value="" selected="true">None</option>
          <option value="X">X</option>
          <option value="after">after</option>
        <param name="key_added" argument="--key-added" type="text" optional="true"
             label="Name of the field in adata.obs where the normalization factor is stored. Default: don't store."/>

    <expand macro="output_data_obj" description="Normalised data"/>
    <expand macro="export_mtx_outputs"/>

      <param name="input_obj_file" value="filter_genes.h5"/>
      <param name="input_format" value="anndata"/>
      <param name="output_format" value="anndata"/>
      <param name="scale_factor" value="1e4"/>
      <param name="save_raw" value="false"/>
      <output name="output_h5" file="normalise_data.h5" ftype="h5" compare="sim_size"/>

Normalise total counts per cell (`scanpy.pp.normalize_total`)

Normalise each cell by total counts over all genes (excluding top expressed
genes if so required), so that every cell has the same total count after

Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.


  <expand macro="citations"/>