view vsnp_sample_names.xml @ 0:f75e2ac7b6cd draft

author greg
date Tue, 21 Apr 2020 10:17:20 -0400
children 895d18fcfebe
line wrap: on
line source

<tool id="vsnp_sample_names" name="vSNP: sample names" version="1.0.0">
    <command detect_errors="exit_code"><![CDATA[
#import os
#import re
#set output_dir = 'output'
mkdir -p $output_dir
#if str($input_type_cond.input_type) == "single":
    ## We may have a single read or a pair, but in
    ## either case we want the same base file name.
    #set sample_name = $os.path.basename($
    #if $sample_name.find(".") > 0:
        #set sample_name = $sample_name.split(".")[0]
    #end if
    #if $sample_name.find("_") > 0:
        #set sample_name = $sample_name.split("_")[0]
    #end if
    echo $sample_name > $output
    #for $i in $input_type_cond.reads_collection:
        #set sample_name = $os.path.basename($i.element_identifier)
        #if $sample_name.find(".") > 0:
            #set sample_name = $sample_name.split(".")[0]
        #end if
        #set output_file = $os.path.join($output_dir, $sample_name)
        && echo $sample_name > $output_file
    #end for
#end if
        <conditional name="input_type_cond">
            <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
                <option value="single" selected="true">Single files</option>
                <option value="collection">Collections of files</option>
            <when value="single">
                <param name="read" type="data" format="fastqsanger.gz,fastqsanger" label="Sample file"/>
            <when value="collection">
                <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of sample files"/>
        <data name="output" format="txt">
            <filter>input_type_cond['input_type'] == 'single'</filter>
        <collection name="output__collection" type="list">
            <discover_datasets pattern="__name__" directory="output" format="txt" />
            <filter>input_type_cond['input_type'] == 'collection'</filter>
            <param name="input_type" value="collection"/>
            <param name="reads_collection">
                <collection type="list">
                    <element name="BCG_Danish_Human_UK_SRR9596061.fastq" value="BCG_Danish_Human_UK_SRR9596061.fastq" dbkey="89"/>
                    <element name="Dassie_Dassie_ZA_SRR3745455.fastq" value="Dassie_Dassie_ZA_SRR3745455.fastq" dbkey="89"/>
                    <element name="Mbov_Cattle_NI_SRR10993937.fastq" value="Mbov_Cattle_NI_SRR10993937.fastq" dbkey="89"/>
            <output_collection name="output__collection" type="list">
                <element name="BCG_Danish_Human_UK_SRR9596061" file="BCG_Danish_Human_UK_SRR9596061" ftype="txt"/>
                <element name="Dassie_Dassie_ZA_SRR3745455" file="Dassie_Dassie_ZA_SRR3745455" ftype="txt"/>
                <element name="Mbov_Cattle_NI_SRR10993937" file="Mbov_Cattle_NI_SRR10993937" ftype="txt"/>
**What it does**

Accepts one or more sample files and extracts a unique portion of the file name as the content of the output file(s).  These
text files are then used as workflow parameter values for the Read Group Identifier parameter in the bwa-mem tool.

**Required Options**

 * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads) based on the selected option.
        <citation type="bibtex">
            journal = {None},
            author = {1. Stuber T},
            title = {Manuscript in preparation},
            year = {None},
            url = {},}