changeset 0:51816af2f7ac draft default tip

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit a1ad1ddd9b8e4db5bb82c3accae8311e0e488b19"
author ebi-gxa
date Fri, 27 Nov 2020 13:37:59 +0000
parents
children
files ct_downsample_cells.xml ct_macros.xml
diffstat 2 files changed, 125 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ct_downsample_cells.xml	Fri Nov 27 13:37:59 2020 +0000
@@ -0,0 +1,60 @@
+<tool id="ct_downsample_cells" name="Cell types - Downsample Cells" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+    <description>to avoid memory issues</description>
+    <macros>
+        <import>ct_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        ln -s "${matrix}" matrix.mtx &&
+        ln -s "${genes}" genes.tsv &&
+        ln -s "${barcodes}" barcodes.tsv &&
+        downsample_cells.R --expression-data . --metadata "${metadata}" --cell-id-field "${cell_id_field}" --cell-type-field "${cell_type_field}" --array-size-limit "${array_size_limit}" --output-dir "${output_dir}" --metadata-upd "${metadata_upd}"
+
+        #if $exclusions
+        --exclusions "${exclusions}"
+        #end if
+
+         ]]></command>
+    <inputs>
+        <param name="matrix" type="data" format="txt" label="Expression matrix in sparse matrix format (.mtx)"/>
+        <param name="genes" type="data" format="tsv,tabular" label="Gene table"/>
+        <param name="barcodes" type="data" format="tsv,tabular" label="Barcode/cell table"/>
+        <param type="data" name="metadata" format="txt" label="Metadata Table" help="Metadata file mapping cells to cell types" />
+        <param type="text" name="cell_id_field" label="Cell ID Field" optional="true" value="id" help="Cell ID field">
+            <expand macro="sanitize_strings" />
+        </param>
+        <param type="text" name="cell_type_field" label="Cell Type Field" optional="true" value="inferred cell type" help="Name of cell type column in metada file">
+            <expand macro="sanitize_strings" />
+        </param>
+        <param type="data" name="exclusions" label="Exclusions file" format="yml" optional="true" help="YAML file with trivial terms and unlabelled cells" />
+        <param type="integer" name="array_size_limit" value='2000000000' label="Array Size Limit" help="Maximum matrix size (product of genes and cells), which will determine given the number of genes in the datasets how many cells to down-sample to. Warning: do not exceed the default value." />
+        <param type="text" name="output_dir" value="10x_data_sampled" label="Sampled Data" help="Name of the directory for sampled data" />
+    </inputs>
+    <outputs>
+        <data name="matrix_upd" from_work_dir="${output_dir}/matrix.mtx" />
+        <data name="barcodes_upd" from_work_dir="${output_dir}/barcodes.tsv" />
+        <data name="genes_upd" from_work_dir="${output_dir}/genes.tsv" />
+        <data name="metadata_upd" format="txt" label="Updated metadata" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="matrix" value="10x_data/matrix.mtx" />
+            <param name="genes" value="10x_data/genes.tsv"  />
+            <param name="array_size_limit" value='3955000' />
+            <param name="cell_type_field" value="inferred cell type - authors labels"  />
+            <param name="barcodes" value="10x_data/barcodes.tsv" />
+            <param name="metadata" value="E-MTAB-7249_unmelted_sdrf.tsv"  />
+            <output name="matrix_upd" file="10x_data_upd/matrix.mtx" compare="sim_size" delta="10000000" />
+            <output name="barcodes_upd" file="10x_data_upd/barcodes.tsv" compare="sim_size" delta="10000000" />
+            <output name="genes_upd" file="10x_data_upd/genes.tsv" compare="sim_size" delta="10000000" />
+            <output name="metadata_upd" file="E-MTAB-7249_unmelted_sdrf_downsampled.tsv" compare="sim_size" delta="10000000"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+    @HELP@
+    
+    @VERSION_HISTORY@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ct_macros.xml	Fri Nov 27 13:37:59 2020 +0000
@@ -0,0 +1,65 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.1.1</token>
+    <token name="@HELP@">More information can be found at https://github.com/ebi-gene-expression-group/cell-types-analysis</token>
+    <token name="@PROFILE@">18.01</token>
+    <xml name="requirements">
+      <requirements>
+        <requirement type="package" version="0.1.9">cell-types-analysis</requirement>
+            <yield/>
+      </requirements>
+    </xml>
+    <xml name="version">
+      <version_command><![CDATA[
+        conda list | grep cell-types-analysis | egrep -o [0-9]\.[0-9]\.[0-9]
+    ]]></version_command>
+    </xml>
+    <token name="@VERSION_HISTORY@"><![CDATA[
+**Version history**
+0.1.6+galaxy0: Add removing non-alphanumeric characters from labels in metadata files
+0.0.5+galaxy0: Initial contribution. Andrey Solovyev, Expression Atlas team https://www.ebi.ac.uk/gxa/home at EMBL-EBI https://www.ebi.ac.uk/.
+0.0.5+galaxy1: Standardise input/output format into .txt. 
+    ]]></token>
+    <xml name="sem_siml_options">
+      <param type="select" name="sem_siml_metric" label="Semantic similarity metric"  help="What semantic similarity metric should be used? NB: if include-sem-siml is set to True, make sure to use a metric that is in the [0;1] interval. See https://www.bioconductor.org/packages/release/bioc/html/Onassis.html for more detail.">
+            <option value="lin" selected="true">lin</option>
+            <option value="jaccard">jaccard</option>
+            <option value="jc_norm">jc_norm</option>
+            <option value="schlicker">schlicker</option>
+            <option value="edge_resnik">edge_resnik</option>
+            <option value="sim">sim</option>
+            <option value="edge_leachod">edge_leachod</option>
+            <option value="edge_slimani">edge_slimani</option>
+            <option value="edge_wupalmer">edge_wupalmer</option>
+            <option value="edge_rada_lca">edge_rada_lca</option>
+            <option value="edge_li">edge_li</option>
+            <option value="resnik">resnik</option>
+        </param>
+    </xml>
+    <xml name="sanitize_strings">
+        <sanitizer invalid_char="">
+        <valid initial="string.ascii_letters,string.digits">
+            <add value="_" />
+            <add value="-" />
+            <add value="[" />
+            <add value="]" />
+            <add value=" " />
+            </valid>
+    </sanitizer>
+    </xml>
+    <xml name="citations">
+      <citations>
+        <citation type="bibtex">
+          @misc{github-cell-types-analysis.git,
+            author = {Andrey Solovyev, EBI Gene Expression Team},
+            year = {2020},
+            title = {Suite of scripts for analysis of scRNA-seq cell type classification tool outputs},
+            publisher = {GitHub},
+            journal = {GitHub repository},
+            url = {https://github.com/ebi-gene-expression-group/cell-types-analysis.git},
+          }
+        </citation>
+        <citation type="doi">10.1101/2020.04.08.032698</citation>
+        <yield />
+      </citations>
+    </xml>
+</macros>