view build_matrix.xml @ 0:1bf008d6d54e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/episcanpy/ commit ce8ee43d7285503a24c7b0f55c09c513be8c66f5
author iuc
date Tue, 18 Apr 2023 13:19:11 +0000
parents
children 31a21ba2c5ea
line wrap: on
line source

<tool id="episcanpy_build_matrix" name="Build count matrix" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>with EpiScanpy</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
bgzip -c '$fragment_file' > fragments.gz &&
tabix -p bed fragments.gz &&
cat '$script_file' > '$hidden_output' &&
python '$script_file' >> '$hidden_output' &&
touch 'anndata_info.txt' &&
cat 'anndata_info.txt' @CMD_prettify_stdout@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
peaks = esc.ct.load_peaks('$peaks_file')
esc.ct.norm_peaks(peaks, extension=$extension)
esc.ct.bld_mtx_bed(
    fragment_file='fragments.gz',
    feature_region=peaks,
    #if $reference_chr.chr_select == 'custom'
        #set $chromosomes = ([x.strip() for x in str($reference_chr.chromosomes).split(',')])
        chromosomes=$chromosomes,
    #else
        chromosomes='$reference_chr.chr_select',
    #end if
    save='anndata.h5ad')
]]></configfile>
    </configfiles>
    <inputs>
        <param name="fragment_file" type="data" format="bed" label="ATAC fragments file" />
        <param name="peaks_file" type="data" format="tabular" label="Features file" help="Peaks BED file or MACS2 narrowPeak file"/>
        <param name="extension" type="integer" value="0" min="0" label="Number of bases to extend both sides of peaks" />
        <conditional name="reference_chr">
            <param name="chr_select" type="select" label="Select the chromosomes of the species you are considering" >
                <option value="human">Human chromosomes ['1', '2', '3', ... , '22', 'X', 'Y']</option>
                <option value="mouse">Mouse chromosomes ['1', '2', '3', ... ', '19', 'X', 'Y']</option>
                <option value="custom">Custom list of chromosomes</option>
            </param>
            <when value="human" />
            <when value="mouse" />
            <when value="custom">
                <param name="chromosomes" value="" type="text" label="Enter comma seperated list of chromosome ids (without chr prefix)" >
                    <expand macro="sanitize_query" />
                </param>
            </when>
        </conditional>
        <expand macro="inputs_common_advanced"/>
    </inputs>
    <outputs>
        <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} on ${on_string}: Annotated data matrix"/>
        <data name="hidden_output" format="txt" label="Log file" hidden="true" >
            <filter>advanced_common['show_log']</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <!-- count matrix -->
            <param name="fragment_file" value="chrY.fragments.bed" />
            <param name="peaks_file" value="chrY.peaks.bed" />
            <conditional name="reference_chr">
                <param name="chr_select" value="custom" />
                <param name="chromosomes" value="Y" />
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="anndata_out" file="chrY.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
    </tests>
  <help>
.. class:: infomark

**What it does**

Builds single-cell ATAC-seq count matrix in Anndata format.  

-----

**Inputs**

- ATAC fragments containing the positions of Tn5 integration sites, the cell barcode that the DNA fragment originated from, and the number of times the fragment was sequenced. An example::

    chrY	2650256	2650533	GACCAATGTCCGTAGC	1
    chrY	2650420	2650463	TGACAACGTACTTCAG	1
    chrY	2650444	2650643	GTGGATTGTACAAGCG	3
    chrY	2650639	2650990	ATAGGCTAGGGCTCTC	2
    chrY	2650650	2650692	GACTAACAGCAACGGT	1
    chrY	2650699	2650942	TCAAAGCTCAAAGTAG	1
    chrY	2650768	2650809	TTGTTGTAGGGCATTG	2
    chrY	2650841	2650873	TTGTTGTAGGGCATTG	1
    chrY	2650957	2650995	GACTAACAGCAACGGT	1
    chrY	2651205	2651265	TCAAAGCTCAAAGTAG	1
    chrY	2651215	2651268	TCACAAGGTCAAGACG	1

- Features file. A plain BED file with peak locations or narrowPeak file from MACS2.

**Output**

- Count matrix in Anndata format.

  </help>
    <expand macro="citations"/>
</tool>