view data_reader.xml @ 1:b2c1bf6f0f86 draft

Deleted selected files
author brenninc
date Mon, 09 May 2016 02:04:19 -0400
parents ccabef3f7d5f
children
line wrap: on
line source

<tool id="directory_data_reader" name="Directory Data Finder" version="0.2">
    <description>Reads a particular data type from a directory on the server</description>
    <command interpreter="python">
<![CDATA[
        directory_copier.py  
        #if $results.required=="data"
            --ending .${results.extension.file_type} 
            --link
            #if $results.start
                --start $results.start
            #end if      
            #if $results.last
                --last $results.last
            #end if      
            #if $results.extension.file_type=="fa"
                --new_ending .fasta
            #end if
            #if $results.extension.file_type=="fq"
                --new_ending .fastq
            #end if
            #if $results.extension.file_type=="text"
                --new_ending .txt
            #end if
            #if $results.extension.file_type=="tsv"
                --new_ending .tabular
            #end if
            #if $results.extension.file_type in ["fasta.gz"]
                --decompress
                --new_ending .fasta
            #end if
            #if $results.extension.file_type=="fastq"
                --new_ending .$results.extension.new_galaxy.new_ending
            #end if
            #if $results.extension.file_type=="fastq.gz"
                --decompress
                --new_ending .${results.extension.new_galaxy.new_ending}
            #end if
        #else
            --ending bam 
            --ending csv 
            --ending fa 
            --ending fasta 
            --ending fasta.gz 
            --ending fastq 
            --ending fastq.gz 
            --ending fasta 
            --ending fq 
            --ending sam 
            --ending tabular 
            --ending text 
            --ending tsv 
            --ending txt 
            --ending xls 
            --ending xlsx
        #end if      
        #if $directory.startswith('/'):
            --path ${directory}
        #else
            --path $__tool_directory__/${directory}
        #end if      
        --list ${listing}
]]>
    </command>
    <inputs>
        <param name="directory" type="text" label="Directory to read data from." />
        <param name="list_name" type="text" size="25" label="output name" value="input data"/>
        <conditional name="results">
            <param name="required" type="select" label="Download data or just directory listing" help="Select type of action required.">
                <option value="data" selected="true">Data and listing of selected type</option>
                <option value="listing">Get listing of selected file types </option>
            </param>
            <when value="data">
                <param name="start" type="text" value="" label="String which must be at the start of each file name" />
                <param name="last" type="text" value="" label="String which must be at the end of the file name (excluding the file type)" />
                <conditional name="extension">
                    <param name="file_type" type="select" label="File Type" help="File Type.">
                        <option value="bam">*.bam files</option>
                        <option value="csv">*.csv files</option>
                        <option value="fa">*.fa files Files saved as *.fasta for galaxy</option>
                        <option value="fasta">*.fasta files</option>
                        <option value="fasta.gz">*.fasta.gz files</option>
                        <option value="fastq">*.fastq files</option>
                        <option value="fastq.gz">*.fastq.gz files</option>
                        <option value="fastq">*.fastq files</option>
                        <option value="fastq.gz">*.fastq.gz files</option>
                        <option value="fq">*.fq files Files saved as *.fastq for galaxy</option>
                        <option value="sam">*.sam files</option>
                        <option value="tabular">*.tabular Files</option>
                        <option value="text">*.text Files saved as *.txt for galaxy</option>
                        <option value="tsv">*.tsv files saved as *.tabular for galaxy</option>
                        <option value="txt">*.txt Files</option>
                        <option value="xls">*.xls files</option>
                        <option value="xlsx">*.xlsx files</option>
                    </param>
                    <when value="bam" />
                    <when value="csv" />
                    <when value="fa" />
                    <when value="fasta" />
                    <when value="fasta.gz" />
                    <when value="fastq" >
                        <conditional name="new_galaxy">
                            <param name="new_ending" type="select" label="Ending to be used for Galaxy" help="Will determine which down stream tools can be used.">
                                <option value="fastq" selected="true">Keep data as general fastq format</option>
                                <option value="fastqsanger">Tag data as fastq sanger in galaxy</option>
                                <option value="fastqsolexa">Tag data as fastq solexa in galaxy</option>
                                <option value="fastqillumina">Tag data as fastq illumina in galaxy</option>
                             </param>
                            <when value="fastq" />
                            <when value="fastqsanger" />
                            <when value="fastqsolexa" />
                            <when value="fastqillumina" />
                        </conditional>
                    </when>
                    <when value="fastq.gz" >
                        <conditional name="new_galaxy">
                            <param name="new_ending" type="select" label="Ending to be used for Galaxy" help="Will determine which down stream tools can be used.">
                                <option value="fastq" selected="true" >Keep data as general fastq format</option>
                                <option value="fastqsanger">Tag data as fastq sanger in galaxy</option>
                                <option value="fastqsolexa">Tag data as fastq solexa in galaxy</option>
                                <option value="fastqillumina">Tag data as fastq illumina in galaxy</option>
                             </param>
                            <when value="fastq" />
                            <when value="fastqsanger" />
                            <when value="fastqsolexa" />
                            <when value="fastqillumina" />
                        </conditional>
                    </when>
                    <when value="fq" />
                    <when value="sam" />
                    <when value="tabular" />
                    <when value="text" />
                    <when value="tsv" />
                    <when value="txt" />
                    <when value="xls" />
                    <when value="xlsx" />
                </conditional>
            </when>
            <when value="listing">
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="txt" name="listing" label="List of files in $list_name">
        </data>
        <!-- Ideally galaxy can get the type based on the file extensions. If so just add the type here -->
        <collection type="list" label="$list_name" name="data_collection">
            <filter>(results['required'] == 'data')</filter>
            <discover_datasets pattern="__designation_and_ext__" directory="output" visible="true" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="directory" value="test-data" />
            <param name="list_name" value="csv_files" />
            <param name="results|extension|file_type" value="csv"/>
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.csv" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="csv" file="sample1.csv" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="fa"/>
             <output name="listing">
                <assert_contents>
                    <has_line line="sample1.fasta" />
                    <has_line line="other.fasta" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="fasta" file="sample1.fasta" />
                <element name="other" ftype="fasta" file="sample1.fasta" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="fasta"/>
            <param name="results|start" value="sam" />
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.fasta" />
                    <not_has_text text="other.fasta" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="fasta" file="sample1.fasta" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="fasta.gz"/>
            <param name="results|last" value="le1" />
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.fasta" />
                    <not_has_text text="other.fasta" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="fasta" file="sample1.fasta" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="fq"/>
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.fastq" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="fastq" file="sample1.fastq" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="fastq"/>
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.fastq" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="fastq" file="sample1.fastq" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="fastq"/>
            <param name="results|extension|new_galaxy|new_ending" value="fastqsanger"/>
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.fastqsanger" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="fastqsanger" file="sample1.fastq" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="fastq.gz"/>
            <output name="data_collection">
                <assert_contents>
                    <has_line line="sample1.fastq" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="fastq" file="sample1.fastq" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="fastq.gz"/>
            <param name="results|extension|new_galaxy|new_ending" value="fastqsanger"/>
            <output name="listing_fastq_gz">
                <assert_contents>
                    <has_line line="sample1.fastqsanger" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="fastqsanger" file="sample1.fastq" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="sam"/>
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.sam" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="sam" file="sample1.sam" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="tabular"/>
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.tabular" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="tabular" file="sample1.tabular" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="text"/>
            <output name="listing_text">
                <assert_contents>
                    <has_line line="sample1.txt" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="txt" file="sample1.text" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="tsv"/>
            <output name="data_collection">
                <assert_contents>
                    <has_line line="sample1.tabular" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="tabular" file="sample1.tsv" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="txt" />
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.txt" />
                </assert_contents>  
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="txt" file="sample1.txt" />
                <element name="sample2" ftype="txt" file="sample2.txt" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="xls"/>
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.xls" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="xls" file="sample1.xls" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|extension|file_type" value="xlsx"/>
            <output name="listing">
                <assert_contents>
                    <has_line line="sample1.xlsx" />
                </assert_contents>
            </output>
            <output_collection name="data_collection" type="list">
                <element name="sample1" ftype="xlsx" file="sample1.xlsx" />
           </output_collection>
        </test>
        <test>
            <param name="directory" value="test-data" />
            <param name="results|required" value="listing"/>
            <output name="listing_all">
                <assert_contents>
                    <has_line line="sample1.csv" />
                    <has_line line="sample1.fasta" />
                    <has_line line="sample1.fasta.gz" />
                    <has_line line="sample1.fastq" />
                    <has_line line="sample1.fastq.gz" />
                    <has_line line="sample1.sam" />
                    <has_line line="sample1.tabular" />
                    <has_line line="sample1.text" />
                    <has_line line="sample1.tsv" />
                    <has_line line="sample1.txt" />
                    <has_line line="sample1.xls" />
                    <has_line line="sample1.xlsx" />
                 </assert_contents>
            </output>
        </test>

    </tests>

    <help>
<![CDATA[
This tool will lookup files on the Galaxy server machine, including mounted directories.

Only directories that are included in the white list and not in the black list are allowed.
If the directory you require does not pass the white list or blacklist test please contact the local galaxy admin.
(Admins see README)

This tool only supports a limited set of types and file extenstions. 
No other files are ever returned either by data or listing.
These endings are case senitive.

====

The data options will look for all files that have a particular ending in the selected directory.

The tool will return two things.

1. A Dataset collection of all the detected files.

2. A file with the names of all the detected files. These will be sorted in the same order as galaxy builds the dataset collection. 

The files can be filtered by setting a specific start strinf for the file name. 
Only files that start with this string (case senstive) will be included.

Files can also be filter for the last part before the file extsentions.

Assuming the directory has:
C01_R1_001.fasta   C01_R2_001.fatsa   C02_R1_001.fasta   C02_R2_001.fatsa

Setting start C01 will return just the C01 files:   C01_R1_001.fasta   C01_R2_001.fatsa

Setting last R1_001 will return the read1 files:   C01_R1_001.fasta   C02_R1_001.fasta

As Galaxy detects the file type based on the extension this tool will change the exstension for supported alternative file ends.
    This includes (manually) setting the exstension to fastqsanger, fastqsolexa, fastqillumina for tools that specify one of these.

This tool will unzip gz files.

====

The listing option will return a txt file with all the files found with any of the supported endings. Other files in that directory are not included.

The file exstensions are left as in the directory.

File start and last filters are not supported in this mode.
]]>
    </help>
    <citations>
    </citations>

</tool>