view garnett_train_classifier.xml @ 3:9be21507358d draft default tip

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 660486ba0e1ef2f0195e7c6f75448221ba4640f7"
author ebi-gxa
date Tue, 28 Apr 2020 11:33:58 -0400
parents 2067ebe66b55
children
line wrap: on
line source

<tool id="garnett_train_classifier" name="Garnett - train classifier" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">
    <description>Train classifier based on marker gene list</description>
    <macros>
        <import>garnett_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        garnett_train_classifier.R --cds-object '${cds_object}' --marker-file-path '${marker_file_path}' --database '${database}' --cores \${GALAXY_SLOTS:-1} --output-path '${output_classifier_path}'
        #if $train_id
        --train-id '${train_id}' 
        #end if
        #if $cds_gene_id_type
        --cds-gene-id-type '${cds_gene_id_type}' 
        #end if
        #if $marker_file_gene_id_type
        --marker-file-gene-id-type '${marker_file_gene_id_type}' 
        #end if
        #if $num_unknown
        --num-unknown '${num_unknown}' 
        #end if
        #if $min_observations
        --min-observations '${min_observations}' 
        #end if
        #if $max_training_samples
        --max-training-samples '${max_training_samples}' 
        #end if
        #if $propogate_markers
        --propogate-markers '${propogate_markers}'
        #end if
        #if $classifier_gene_id_type
        --classifier-gene-id-type '${classifier_gene_id_type}' 
        #end if
    ]]></command>
    <inputs>
        <param type="data" name="cds_object" label="CDS object" format="rdata" help="CDS object with expression data for training" />
        <param type="data" name="marker_file_path" format="txt" label="Marker file path" help="File with marker genes specifying cell types. 
        See https://cole-trapnell-lab.github.io/garnett/docs/#constructing-a-marker-file for specification of the file format" />
        <param type="text" name="train_id" label="Dataset ID" help="ID of the training dataset" />
        <param type="text" name="database" label="gene database" value="org.Hs.eg.db" help="argument for Bioconductor AnnotationDb-class package used for converting gene IDs" />
        <param type="text" name="cds_gene_id_type" label="CDS gene id type" value="ENSEMBL" help="Format of the gene IDs in your CDS object" />
        <param type="text" name="marker_file_gene_id_type" label="Marker gene id type" value="ENSEMBL" help="Format of the gene IDs in your marker file" />
        <param type="integer" name="num_unknown" value='500' label="number of outputs" help="Number of outgroups to compare against" />
        <param type="integer" name="min_observations" value='8' label="Min num of observations" help="The minimum number of representative cells per 
        cell type required to include the cell type in the predictive model" />
        <param type="integer" name="max_training_samples" value='500' label="Max num of training samples" help="The maximum number of representative cells per cell
        type to be included in the model training. Decreasing this number 
        increases speed, but may hurt performance of the model." />
        <param type="boolean" name="propogate_markers" checked="true" label="propogate markers" help="Should markers from child nodes of a cell type be used
        in finding representatives of the parent type?" />
        <param type="text" name="classifier_gene_id_type"  label="Classifier gene ID type" value="ENSEMBL" help="The type of gene ID that will be used in the classifier. If possible for your organism, this should be 'ENSEMBL', which is the default." />
    </inputs>
    <outputs>
        <data name="output_classifier_path" format="rdata" />
    </outputs>
    <tests>
        <test>
            <param name="cds_object" value="garnett_cds.rds" />
            <param name="marker_file_path" value="markers_upd.txt" />
            <output name="output_classifier_path" value="trained_classifier.rds" />
        </test>
    </tests>
    <help><![CDATA[
        @HELP@

        @VERSION_HISTORY@
    ]]></help>
    <expand macro="citations" />
</tool>