view construct_eset.xml @ 5:ada0d6224015 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit d5c7ca22af1d4f0eaa7a607886554bebb95e8c50
author bgruening
date Mon, 28 Oct 2024 17:32:30 +0000
parents 282819d09a4f
children 48f0fb3061b1
line wrap: on
line source

<tool id="music_construct_eset" name="Construct Expression Set Object" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
      profile="21.09" license="GPL-3.0-or-later" >
    <description>Create an ExpressionSet object from tabular and textual data</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
cat '$conf' >> /dev/stderr &&
Rscript --vanilla '$conf'
]]></command>
    <configfiles>
        <configfile name="conf" >

suppressWarnings(suppressPackageStartupMessages(library(xbioc)))
suppressWarnings(suppressPackageStartupMessages(library(MuSiC)))

null_str_vec = function(gstr, is.str=FALSE){
   tokens = unlist(as.vector(strsplit(gstr, split=",")))
   if (length(tokens) == 0){
      if (is.str){
         return(character(0))
      }
      return(NULL)
   }
   if (length(tokens) == 1){
      return(tokens[[1]])
   }
   return(tokens)
}

## Assay Data
## F rows of features and S columns of samples
## - This file is the only non-optional parameter
exprs_file = '$exprs_file'
exprs = as.matrix(read.table(exprs_file, header = T, sep = "\t",
                  row.names = 1, as.is = T, check.names=FALSE))
## Phenotype Data
## S rows of samples, and V columns of covariates (e.g. sex, age, etc.)
pdata = NULL
#if '$pdata_file':
pdata_file = '$pdata_file'
pdata = read.table(pdata_file, row.names = 1, header = T, sep = "\t", as.is=T, check.names=FALSE)
#end if
## Annotation and Feature Data, or just a string for type of chip used
annotation = null_str_vec('$annotation')
if (is.null(annotation)){
   annotation = character(0)
}

if (all(rownames(pdata) != colnames(exprs))) {
   stop("Number of Samples between phenotypes and assays are not the same")
}

##metadata = data.frame(
##    labelDescription = c(
##        "Patient gender",
##        "Case/Control",
##        "Tumor progress"),
##    row.names = c("gender", "type", "score"))
metadata = as.data.frame(do.call(rbind, list(
#for i, row in enumerate($metadata):
      #if i==0
      list(ldesc='$row.label_desc', lname='$row.row_names')
      #else
      ,list(ldesc='$row.label_desc', lname='$row.row_names')
      #end if
#end for
)))
metadata\$labelDescription = unlist(metadata\$ldesc)
rownames(metadata) = unlist(metadata\$lname)
metadata\$ldesc = NULL
metadata\$lname = NULL

if (nrow(metadata)==0) {
    pheno_data = new("AnnotatedDataFrame", data = pdata)
} else {
    pheno_data = new("AnnotatedDataFrame", data = pdata, varMetadata = metadata)
}


## Experiment Description -- using the MIAME object
experiment_data = new(
    "MIAME",
    name = null_str_vec('$expdata.name', is.str=T),
    lab = null_str_vec('$expdata.lab', is.str=T),
    contact = null_str_vec('$expdata.contact', is.str=T),
    title = null_str_vec('$expdata.title', is.str=T),
    abstract = null_str_vec('$expdata.abstract', is.str=T),
    url = null_str_vec('$expdata.url', is.str=T),
    other = list(
#for i, row in enumerate($expdata.other):
    #if i==0
    '$row.field' = null_str_vec('$row.comment', is.str=T)
    #else
    ,'$row.field' = null_str_vec('$row.comment', is.str=T)
    #end if
#end for
))

e_set = ExpressionSet(assayData = exprs,
                      phenoData = pheno_data,
                      experimentData = experiment_data,
                      annotation = annotation)

capture.output(print(e_set), file = '$out_txt')
print(e_set)
saveRDS(e_set, file= '$out_rds')

        </configfile>
    </configfiles>
    <inputs>
        <param name="exprs_file" label="Assay Data" type="data" format="tabular,tsv"
               help="F rows of features and S columns of samples. A Tab delimited file, with a header line and the first column has row names." />
        <param name="pdata_file" label="Phenotype Data" type="data"
               format="tabular,tsv" optional="true"
               help="S rows of samples, and V columns of covariates (e.g. sex, age)" />
        <param name="annotation" label="Annotation" type="text"
               optional="true" help="A string detailing which chip was used"  >
            <expand macro="validator_text_and_urls" />
        </param>
        <repeat name="metadata" title="Meta Data" min="0" max="15" >
            <!-- optional, so min=0 -->
            <param name="row_names" label="Label" type="text"
                   help="Metadata should correspond directly to the columns of the Phenotype Data" >
                <expand macro="validator_text_and_urls" />
            </param>
            <param name="label_desc" label="Label Description" type="text" >
                <expand macro="validator_text_and_urls" />
            </param>
        </repeat>
        <section name="expdata" title="Experiment Data" >
            <param name="name" label="Experimenter Name" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            </param>
            <param name="lab" label="Lab" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            </param>
            <param name="contact" label="Contact" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            </param>
            <param name="title" label="Title" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            </param>
            <param name="abstract" label="Abstract" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            </param>
            <param name="url" label="URL" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            </param>
            <repeat name="other" title="Other Fields" min="1">
                <param name="field" label="FieldName" type="text" value="notes" >
                    <expand macro="validator_text_and_urls" />
                </param>
                <param name="comment" label="Comment" type="text" value="Created in Galaxy" >
                    <expand macro="validator_text_and_urls" />
                </param>
            </repeat>
        </section>
    </inputs>
    <outputs>
        <data name="out_txt" format="txt" label="${tool.name} on ${on_string}: General Info" />
        <data name="out_rds" format="@RDATATYPE@" label="${tool.name} on ${on_string}: RData ESet Object" />
    </outputs>
    <tests>
        <test expect_num_outputs="2" >
            <!-- Simple object -->
            <param name="exprs_file" value="mouse_scrna_exprs.tabular" />
            <param name="pdata_file" value="mouse_scrna_pheno.tabular" />
            <output name="out_txt">
                <assert_contents>
                    <has_text text="assayData: 100 features, 100 samples " />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2" >
            <!-- Values from the manual -->
            <param name="exprs_file" value="array.tsv" />
            <param name="pdata_file" value="pheno.tsv" />
            <param name="annotation" value="hgu95av2" />
            <repeat name="metadata" >
                <param name="row_names" value="gender" />
                <param name="label_desc" value="Patient gender" />
            </repeat>
            <repeat name="metadata" >
                <param name="row_names" value="type" />
                <param name="label_desc" value="Case/control status" />
            </repeat>
            <repeat name="metadata" >
                <param name="row_names" value="score" />
                <param name="label_desc" value="Tumor Progress on XYZ scale" />
            </repeat>
            <section name="expdata">
                <param name="name" value="Pierre Fermat" />
                <param name="lab" value="Francis Galton Lab" />
                <param name="contact" value="pfermat@lab.not.exist" />
                <param name="title" value="Smoking-Cancer Experiment" />
                <param name="abstract" value="An example ExpressionSet" />
                <param name="url" value="www.lab.not.exist" />
                <repeat name="other" >
                    <param name="field" value="notes" />
                    <param name="comment" value="Created in Galaxy" />
                </repeat>
                <repeat name="other" >
                    <param name="field" value="general" />
                    <param name="comment" value="Some other comment" />
                </repeat>
            </section>
            <output name="out_txt">
                <assert_contents>
                    <has_text text="assayData: 3 features, 2 samples " />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Construct an ExpressionSet object from a variety of input attributes, such as experimentData, phenotype data, and annotations.

For more options and information, consult `the manual <http://www.bioconductor.org/packages/release/bioc/vignettes/Biobase/inst/doc/ExpressionSetIntroduction.pdf>`_ and the `rdocumentation <https://www.rdocumentation.org/packages/Biobase/versions/2.32.0/topics/ExpressionSet>`_
.
    ]]></help>
    <citations>
        <citation type="bibtex">
            @misc{falcon2007introduction,
            title={An introduction to bioconductor’s expressionset class},
            author={Falcon, Seth and Morgan, Martin and Gentleman, Robert},
            year={2007}
            }
        </citation>
    </citations>
</tool>