changeset 0:95431f03a8fc draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 4990a52ac6fd3d97434a24051e5828c86c8a802c
author ebi-gxa
date Tue, 20 Aug 2024 12:48:39 +0000
parents
children c1207fb29b08
files get_test_data.sh macros.xml pyscenic_grn.xml
diffstat 3 files changed, 141 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_test_data.sh	Tue Aug 20 12:48:39 2024 +0000
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+TF_DATA_LINK='https://raw.githubusercontent.com/aertslab/scenic-nf/master/example/allTFs_hg38.txt'
+MOTIF2TF_LINK='https://raw.githubusercontent.com/aertslab/scenic-nf/master/example/motifs.tbl'
+RANKING_LINK='https://zenodo.org/records/13328724/files/genome-ranking_v2.feather'
+LOOM_INPUT_LINK='https://raw.githubusercontent.com/aertslab/scenic-nf/master/example/expr_mat.loom'
+
+REGULONS_LINK='https://zenodo.org/records/13328724/files/regulons.tsv'
+TF2TARGETS_LINK='https://zenodo.org/records/13328724/files/tf2targets.tsv'
+
+function get_data {
+  local link=$1
+  local fname=$2
+
+  if [ ! -f $fname ]; then
+    echo "$fname not available locally, downloading.."
+    wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link
+  fi
+}
+
+# get matrix data
+mkdir -p test-data
+pushd test-data
+get_data $TF_DATA_LINK "allTFs_hg38.txt"
+get_data $MOTIF2TF_LINK "motifs.tbl"
+get_data $RANKING_LINK "genome-ranking_v2.feather"
+get_data $LOOM_INPUT_LINK "expr_mat.loom"
+get_data $REGULONS_LINK regulons.tsv
+get_data $TF2TARGETS_LINK tf2targets.tsv
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Aug 20 12:48:39 2024 +0000
@@ -0,0 +1,15 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.12.1</token>
+    <xml name="requirements">
+        <requirements>
+            <container type="docker">
+            aertslab/pyscenic:@TOOL_VERSION@
+        </container>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/nmeth.4463</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyscenic_grn.xml	Tue Aug 20 12:48:39 2024 +0000
@@ -0,0 +1,98 @@
+<tool id="pyscenic_grn" name="PySCENIC GRN" version="@TOOL_VERSION@+galaxy0" profile="21.09">
+    <description>infers gene regulatory networks</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code">
+        <![CDATA[
+            ln -s '${expression_mtx}' expr_mat.loom && 
+            ln -s '${tfs_fname}' tfs.txt &&
+            #if $use_arboretum
+                arboreto_with_multiprocessing.py
+            #else
+                pyscenic grn 
+            #end if
+            -o tf2targets.tsv
+            $transpose
+            #if $method
+            -m '${method}'
+            #end if
+            #if $seed
+            --seed '${seed}'
+            #end if
+            --num_workers \${GALAXY_SLOTS:-1}
+            #if $cell_id_attribute
+            --cell_id_attribute '${cell_id_attribute}'
+            #end if
+            #if $gene_attribute
+            --gene_attribute '${gene_attribute}'
+            #end if
+            $sparse
+            
+            expr_mat.loom tfs.txt &&
+            mv tf2targets.tsv '${tf2targets}'
+        ]]>
+    </command>
+    <inputs>
+        <param name="expression_mtx" type="data" format="loom" label="Expression Matrix Loom File" help="In format rows=genes x columns=cells"/>
+        <param name="tfs_fname" type="data" format="txt" label="Transcription Factors File" help="Simple text file, one transcription factor symbol per line"/>
+        <param name="use_arboretum" type="boolean" label="Use arboretum" checked="false" help="Uses the arboretum approach instead of pyscenic grn call, which can be better for multi processing"/>
+        <param name="transpose" type="boolean" truevalue="-t" falsevalue="" label="Transpose Expression Matrix" help="Use this if the matrix is cell x genes instead of genes x cells as expected"/>
+        <param name="method" type="select" label="Method">
+            <option value="genie3">GENIE3</option>
+            <option value="grnboost2" selected="true">GRNBoost2</option>
+        </param>
+        <param name="cell_id_attribute" type="text" optional="true" label="Cell ID Attribute"/>
+        <param name="gene_attribute" type="text" optional="true" label="Gene Attribute"/>
+        <param name="sparse" type="boolean" label="Sparse Matrix" truevalue="--sparse" falsevalue="" help="If set, load the expression data as a sparse matrix. Currently applies to the grn inference step only."/>
+        <param name="seed" type="integer" optional="true" label="Seed"/>
+    </inputs>
+    <outputs>
+        <data name="tf2targets" format="tabular" label="${tool.name} on ${on_string}: gene regulatory network"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="expression_mtx" value="expr_mat.loom"/>
+            <param name="tfs_fname" value="allTFs_hg38.txt"/>
+            <param name="seed" value="1"/>
+            <output name="tf2targets">
+                <assert_contents>
+                    <has_n_lines n="1006973"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="expression_mtx" value="expr_mat.loom"/>
+            <param name="tfs_fname" value="allTFs_hg38.txt"/>
+            <param name="use_arboretum" value="true"/>
+            <param name="seed" value="1"/>
+            <output name="tf2targets" ftype="tabular">
+                <assert_contents>
+                    <has_n_lines n="1006973"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        This tool runs the `pyscenic grn` command to infer gene regulatory networks.
+
+        **Inputs:**
+
+        - Expression Matrix File: Loom file containing the expression matrix, (rows=genes x columns=cells)
+        - Transcription Factors File: TXT file with a list of transcription factors.
+        
+        **Options:**
+
+        - Output File: Path to the output file (CSV format).
+        - Transpose Expression Matrix: If selected, transpose the expression matrix.
+        - Method: Algorithm for gene regulatory network reconstruction (default: GRNBoost2).
+        - Seed: Seed value for random state initialization.
+        - Number of Workers: Number of workers to use for computation.
+        - Client or Address: Client or IP address of the dask scheduler.
+        - Cell ID Attribute: Column attribute for cell identifiers in the loom file.
+        - Gene Attribute: Row attribute for gene symbols in the loom file.
+        - Load as Sparse Matrix: Load the expression data as a sparse matrix.
+    ]]></help>
+    <expand macro="citations"/>
+</tool>