view construct_eset.xml @ 1:be91cb6f48e7 draft

"planemo upload for repository commit 683bb72ae92b5759a239b7e3bf4c5a229ed35b54"
author bgruening
date Fri, 26 Nov 2021 15:55:11 +0000
parents 2cfd0db49bbc
children 7902cd31b9b5
line wrap: on
line source

<tool id="music_construct_eset" name="Construct Expression Set Object" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
      profile="20.05" license="GPL-3.0-or-later" >
    <description>Create an ExpressionSet object from tabular and textual data</description>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
Rscript --vanilla '$conf'
        <configfile name="conf" >


null_str_vec = function(gstr, is.str=FALSE){
   tokens = unlist(as.vector(strsplit(gstr, split=",")))
   if (length(tokens) == 0){
      if (is.str){
   if (length(tokens) == 1){

## Assay Data
## F rows of features and S columns of samples
## - This file is the only non-optional parameter
exprs_file = '$exprs_file'
exprs = as.matrix(read.table(exprs_file, header = T, sep = "\t",
                  row.names = 1, = T))
## Phenotype Data
## S rows of samples, and V columns of covariates (e.g. sex, age, etc.)
pdata = NULL
#if '$pdata_file':
pdata_file = '$pdata_file'
pdata = read.table(pdata_file, row.names = 1, header = T, sep = "\t",
#end if
## Annotation and Feature Data, or just a string for type of chip used
annotation = null_str_vec('$annotation')
if (is.null(annotation)){
   annotation = character(0)

if (all(rownames(pdata) != colnames(exprs))) {
   stop("Number of Samples between phenotypes and assays are not the same")

##metadata = data.frame(
##    labelDescription = c(
##        "Patient gender",
##        "Case/Control",
##        "Tumor progress"),
##    row.names = c("gender", "type", "score"))
metadata =, list(
#for i, row in enumerate($metadata):
      #if i==0
      list(ldesc='$row.label_desc', lname='$row.row_names')
      ,list(ldesc='$row.label_desc', lname='$row.row_names')
      #end if
#end for
metadata\$labelDescription = unlist(metadata\$ldesc)
rownames(metadata) = unlist(metadata\$lname)
metadata\$ldesc = NULL
metadata\$lname = NULL

if (nrow(metadata)==0) {
    pheno_data = new("AnnotatedDataFrame", data = pdata)
} else {
    pheno_data = new("AnnotatedDataFrame", data = pdata, varMetadata = metadata)

## Experiment Description -- using the MIAME object
experiment_data = new(
    name = null_str_vec('$', is.str=T),
    lab = null_str_vec('$expdata.lab', is.str=T),
    contact = null_str_vec('$', is.str=T),
    title = null_str_vec('$expdata.title', is.str=T),
    abstract = null_str_vec('$expdata.abstract', is.str=T),
    url = null_str_vec('$expdata.url', is.str=T),
    other = list(
#for i, row in enumerate($expdata.other):
    #if i==0
    '$row.field' = null_str_vec('$row.comment', is.str=T)
    ,'$row.field' = null_str_vec('$row.comment', is.str=T)
    #end if
#end for

e_set = ExpressionSet(assayData = exprs,
                      phenoData = pheno_data,
                      experimentData = experiment_data,
                      annotation = annotation)

capture.output(print(e_set), file = '$out_txt')
saveRDS(e_set, file= '$out_rds')

        <param name="exprs_file" label="Assay Data" type="data" format="tabular,tsv"
               help="F rows of features and S columns of samples. A Tab delimited file, with a header line and the first column has row names." />
        <param name="pdata_file" label="Phenotype Data" type="data"
               format="tabular,tsv" optional="true"
               help="S rows of samples, and V columns of covariates (e.g. sex, age)" />
        <param name="annotation" label="Annotation" type="text"
               optional="true" help="A string detailing which chip was used"  >
            <expand macro="validator_text_and_urls" />
        <repeat name="metadata" title="Meta Data" min="0" max="15" >
            <!-- optional, so min=0 -->
            <param name="row_names" label="Label" type="text"
                   help="Metadata should correspond directly to the columns of the Phenotype Data" >
                <expand macro="validator_text_and_urls" />
            <param name="label_desc" label="Label Description" type="text" >
                <expand macro="validator_text_and_urls" />
        <section name="expdata" title="Experiment Data" >
            <param name="name" label="Experimenter Name" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            <param name="lab" label="Lab" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            <param name="contact" label="Contact" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            <param name="title" label="Title" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            <param name="abstract" label="Abstract" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            <param name="url" label="URL" type="text" value="" optional="true" >
                <expand macro="validator_text_and_urls" />
            <repeat name="other" title="Other Fields" min="1">
                <param name="field" label="FieldName" type="text" value="notes" >
                    <expand macro="validator_text_and_urls" />
                <param name="comment" label="Comment" type="text" value="Created in Galaxy" >
                    <expand macro="validator_text_and_urls" />
        <data name="out_txt" format="txt" label="${} on ${on_string}: General Info" />
        <data name="out_rds" format="rdata.eset" label="${} on ${on_string}: RData ESet Object" />
        <test expect_num_outputs="2" >
            <!-- Values from the manual -->
            <param name="exprs_file" value="array.tsv" />
            <param name="pdata_file" value="pheno.tsv" />
            <param name="annotation" value="hgu95av2" />
            <repeat name="metadata" >
                <param name="row_names" value="gender" />
                <param name="label_desc" value="Patient gender" />
            <repeat name="metadata" >
                <param name="row_names" value="type" />
                <param name="label_desc" value="Case/control status" />
            <repeat name="metadata" >
                <param name="row_names" value="score" />
                <param name="label_desc" value="Tumor Progress on XYZ scale" />
            <section name="expdata">
                <param name="name" value="Pierre Fermat" />
                <param name="lab" value="Francis Galton Lab" />
                <param name="contact" value="pfermat@lab.not.exist" />
                <param name="title" value="Smoking-Cancer Experiment" />
                <param name="abstract" value="An example ExpressionSet" />
                <param name="url" value="www.lab.not.exist" />
                <repeat name="other" >
                    <param name="field" value="notes" />
                    <param name="comment" value="Created in Galaxy" />
                <repeat name="other" >
                    <param name="field" value="general" />
                    <param name="comment" value="Some other comment" />
            <output name="out_txt">
                    <has_text text="assayData: 3 features, 2 samples " />
Construct an ExpressionSet object from a variety of input attributes, such as experimentData, phenotype data, and annotations.

For more options and information, consult `the manual <>`_ and the `rdocumentation <>`_
        <citation type="bibtex">
            title={An introduction to bioconductor’s expressionset class},
            author={Falcon, Seth and Morgan, Martin and Gentleman, Robert},