diff data_reader.xml @ 0:ccabef3f7d5f draft

Uploaded first version
author brenninc
date Sun, 08 May 2016 11:01:03 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_reader.xml	Sun May 08 11:01:03 2016 -0400
@@ -0,0 +1,428 @@
+<tool id="directory_data_reader" name="Directory Data Finder" version="0.2">
+    <description>Reads a particular data type from a directory on the server</description>
+    <command interpreter="python">
+<![CDATA[
+        directory_copier.py  
+        #if $results.required=="data"
+            --ending .${results.extension.file_type} 
+            --link
+            #if $results.start
+                --start $results.start
+            #end if      
+            #if $results.last
+                --last $results.last
+            #end if      
+            #if $results.extension.file_type=="fa"
+                --new_ending .fasta
+            #end if
+            #if $results.extension.file_type=="fq"
+                --new_ending .fastq
+            #end if
+            #if $results.extension.file_type=="text"
+                --new_ending .txt
+            #end if
+            #if $results.extension.file_type=="tsv"
+                --new_ending .tabular
+            #end if
+            #if $results.extension.file_type in ["fasta.gz"]
+                --decompress
+                --new_ending .fasta
+            #end if
+            #if $results.extension.file_type=="fastq"
+                --new_ending .$results.extension.new_galaxy.new_ending
+            #end if
+            #if $results.extension.file_type=="fastq.gz"
+                --decompress
+                --new_ending .${results.extension.new_galaxy.new_ending}
+            #end if
+        #else
+            --ending bam 
+            --ending csv 
+            --ending fa 
+            --ending fasta 
+            --ending fasta.gz 
+            --ending fastq 
+            --ending fastq.gz 
+            --ending fasta 
+            --ending fq 
+            --ending sam 
+            --ending tabular 
+            --ending text 
+            --ending tsv 
+            --ending txt 
+            --ending xls 
+            --ending xlsx
+        #end if      
+        #if $directory.startswith('/'):
+            --path ${directory}
+        #else
+            --path $__tool_directory__/${directory}
+        #end if      
+        --list ${listing}
+]]>
+    </command>
+    <inputs>
+        <param name="directory" type="text" label="Directory to read data from." />
+        <param name="list_name" type="text" size="25" label="output name" value="input data"/>
+        <conditional name="results">
+            <param name="required" type="select" label="Download data or just directory listing" help="Select type of action required.">
+                <option value="data" selected="true">Data and listing of selected type</option>
+                <option value="listing">Get listing of selected file types </option>
+            </param>
+            <when value="data">
+                <param name="start" type="text" value="" label="String which must be at the start of each file name" />
+                <param name="last" type="text" value="" label="String which must be at the end of the file name (excluding the file type)" />
+                <conditional name="extension">
+                    <param name="file_type" type="select" label="File Type" help="File Type.">
+                        <option value="bam">*.bam files</option>
+                        <option value="csv">*.csv files</option>
+                        <option value="fa">*.fa files Files saved as *.fasta for galaxy</option>
+                        <option value="fasta">*.fasta files</option>
+                        <option value="fasta.gz">*.fasta.gz files</option>
+                        <option value="fastq">*.fastq files</option>
+                        <option value="fastq.gz">*.fastq.gz files</option>
+                        <option value="fastq">*.fastq files</option>
+                        <option value="fastq.gz">*.fastq.gz files</option>
+                        <option value="fq">*.fq files Files saved as *.fastq for galaxy</option>
+                        <option value="sam">*.sam files</option>
+                        <option value="tabular">*.tabular Files</option>
+                        <option value="text">*.text Files saved as *.txt for galaxy</option>
+                        <option value="tsv">*.tsv files saved as *.tabular for galaxy</option>
+                        <option value="txt">*.txt Files</option>
+                        <option value="xls">*.xls files</option>
+                        <option value="xlsx">*.xlsx files</option>
+                    </param>
+                    <when value="bam" />
+                    <when value="csv" />
+                    <when value="fa" />
+                    <when value="fasta" />
+                    <when value="fasta.gz" />
+                    <when value="fastq" >
+                        <conditional name="new_galaxy">
+                            <param name="new_ending" type="select" label="Ending to be used for Galaxy" help="Will determine which down stream tools can be used.">
+                                <option value="fastq" selected="true">Keep data as general fastq format</option>
+                                <option value="fastqsanger">Tag data as fastq sanger in galaxy</option>
+                                <option value="fastqsolexa">Tag data as fastq solexa in galaxy</option>
+                                <option value="fastqillumina">Tag data as fastq illumina in galaxy</option>
+                             </param>
+                            <when value="fastq" />
+                            <when value="fastqsanger" />
+                            <when value="fastqsolexa" />
+                            <when value="fastqillumina" />
+                        </conditional>
+                    </when>
+                    <when value="fastq.gz" >
+                        <conditional name="new_galaxy">
+                            <param name="new_ending" type="select" label="Ending to be used for Galaxy" help="Will determine which down stream tools can be used.">
+                                <option value="fastq" selected="true" >Keep data as general fastq format</option>
+                                <option value="fastqsanger">Tag data as fastq sanger in galaxy</option>
+                                <option value="fastqsolexa">Tag data as fastq solexa in galaxy</option>
+                                <option value="fastqillumina">Tag data as fastq illumina in galaxy</option>
+                             </param>
+                            <when value="fastq" />
+                            <when value="fastqsanger" />
+                            <when value="fastqsolexa" />
+                            <when value="fastqillumina" />
+                        </conditional>
+                    </when>
+                    <when value="fq" />
+                    <when value="sam" />
+                    <when value="tabular" />
+                    <when value="text" />
+                    <when value="tsv" />
+                    <when value="txt" />
+                    <when value="xls" />
+                    <when value="xlsx" />
+                </conditional>
+            </when>
+            <when value="listing">
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="txt" name="listing" label="List of files in $list_name">
+        </data>
+        <!-- Ideally galaxy can get the type based on the file extensions. If so just add the type here -->
+        <collection type="list" label="$list_name" name="data_collection">
+            <filter>(results['required'] == 'data')</filter>
+            <discover_datasets pattern="__designation_and_ext__" directory="output" visible="true" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="list_name" value="csv_files" />
+            <param name="results|extension|file_type" value="csv"/>
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.csv" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="csv" file="sample1.csv" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="fa"/>
+             <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.fasta" />
+                    <has_line line="other.fasta" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="fasta" file="sample1.fasta" />
+                <element name="other" ftype="fasta" file="sample1.fasta" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="fasta"/>
+            <param name="results|start" value="sam" />
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.fasta" />
+                    <not_has_text text="other.fasta" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="fasta" file="sample1.fasta" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="fasta.gz"/>
+            <param name="results|last" value="le1" />
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.fasta" />
+                    <not_has_text text="other.fasta" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="fasta" file="sample1.fasta" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="fq"/>
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.fastq" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="fastq" file="sample1.fastq" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="fastq"/>
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.fastq" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="fastq" file="sample1.fastq" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="fastq"/>
+            <param name="results|extension|new_galaxy|new_ending" value="fastqsanger"/>
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.fastqsanger" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="fastqsanger" file="sample1.fastq" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="fastq.gz"/>
+            <output name="data_collection">
+                <assert_contents>
+                    <has_line line="sample1.fastq" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="fastq" file="sample1.fastq" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="fastq.gz"/>
+            <param name="results|extension|new_galaxy|new_ending" value="fastqsanger"/>
+            <output name="listing_fastq_gz">
+                <assert_contents>
+                    <has_line line="sample1.fastqsanger" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="fastqsanger" file="sample1.fastq" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="sam"/>
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.sam" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="sam" file="sample1.sam" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="tabular"/>
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.tabular" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="tabular" file="sample1.tabular" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="text"/>
+            <output name="listing_text">
+                <assert_contents>
+                    <has_line line="sample1.txt" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="txt" file="sample1.text" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="tsv"/>
+            <output name="data_collection">
+                <assert_contents>
+                    <has_line line="sample1.tabular" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="tabular" file="sample1.tsv" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="txt" />
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.txt" />
+                </assert_contents>  
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="txt" file="sample1.txt" />
+                <element name="sample2" ftype="txt" file="sample2.txt" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="xls"/>
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.xls" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="xls" file="sample1.xls" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|extension|file_type" value="xlsx"/>
+            <output name="listing">
+                <assert_contents>
+                    <has_line line="sample1.xlsx" />
+                </assert_contents>
+            </output>
+            <output_collection name="data_collection" type="list">
+                <element name="sample1" ftype="xlsx" file="sample1.xlsx" />
+           </output_collection>
+        </test>
+        <test>
+            <param name="directory" value="test-data" />
+            <param name="results|required" value="listing"/>
+            <output name="listing_all">
+                <assert_contents>
+                    <has_line line="sample1.csv" />
+                    <has_line line="sample1.fasta" />
+                    <has_line line="sample1.fasta.gz" />
+                    <has_line line="sample1.fastq" />
+                    <has_line line="sample1.fastq.gz" />
+                    <has_line line="sample1.sam" />
+                    <has_line line="sample1.tabular" />
+                    <has_line line="sample1.text" />
+                    <has_line line="sample1.tsv" />
+                    <has_line line="sample1.txt" />
+                    <has_line line="sample1.xls" />
+                    <has_line line="sample1.xlsx" />
+                 </assert_contents>
+            </output>
+        </test>
+
+    </tests>
+
+    <help>
+<![CDATA[
+This tool will lookup files on the Galaxy server machine, including mounted directories.
+
+Only directories that are included in the white list and not in the black list are allowed.
+If the directory you require does not pass the white list or blacklist test please contact the local galaxy admin.
+(Admins see README)
+
+This tool only supports a limited set of types and file extenstions. 
+No other files are ever returned either by data or listing.
+These endings are case senitive.
+
+====
+
+The data options will look for all files that have a particular ending in the selected directory.
+
+The tool will return two things.
+
+1. A Dataset collection of all the detected files.
+
+2. A file with the names of all the detected files. These will be sorted in the same order as galaxy builds the dataset collection. 
+
+The files can be filtered by setting a specific start strinf for the file name. 
+Only files that start with this string (case senstive) will be included.
+
+Files can also be filter for the last part before the file extsentions.
+
+Assuming the directory has:
+C01_R1_001.fasta   C01_R2_001.fatsa   C02_R1_001.fasta   C02_R2_001.fatsa
+
+Setting start C01 will return just the C01 files:   C01_R1_001.fasta   C01_R2_001.fatsa
+
+Setting last R1_001 will return the read1 files:   C01_R1_001.fasta   C02_R1_001.fasta
+
+As Galaxy detects the file type based on the extension this tool will change the exstension for supported alternative file ends.
+    This includes (manually) setting the exstension to fastqsanger, fastqsolexa, fastqillumina for tools that specify one of these.
+
+This tool will unzip gz files.
+
+====
+
+The listing option will return a txt file with all the files found with any of the supported endings. Other files in that directory are not included.
+
+The file exstensions are left as in the directory.
+
+File start and last filters are not supported in this mode.
+]]>
+    </help>
+    <citations>
+    </citations>
+
+</tool>