Mercurial > repos > brenninc > xdirectory_reader_limited_by_data_table
diff data_reader.xml @ 0:ccabef3f7d5f draft
Uploaded first version
author | brenninc |
---|---|
date | Sun, 08 May 2016 11:01:03 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_reader.xml Sun May 08 11:01:03 2016 -0400 @@ -0,0 +1,428 @@ +<tool id="directory_data_reader" name="Directory Data Finder" version="0.2"> + <description>Reads a particular data type from a directory on the server</description> + <command interpreter="python"> +<![CDATA[ + directory_copier.py + #if $results.required=="data" + --ending .${results.extension.file_type} + --link + #if $results.start + --start $results.start + #end if + #if $results.last + --last $results.last + #end if + #if $results.extension.file_type=="fa" + --new_ending .fasta + #end if + #if $results.extension.file_type=="fq" + --new_ending .fastq + #end if + #if $results.extension.file_type=="text" + --new_ending .txt + #end if + #if $results.extension.file_type=="tsv" + --new_ending .tabular + #end if + #if $results.extension.file_type in ["fasta.gz"] + --decompress + --new_ending .fasta + #end if + #if $results.extension.file_type=="fastq" + --new_ending .$results.extension.new_galaxy.new_ending + #end if + #if $results.extension.file_type=="fastq.gz" + --decompress + --new_ending .${results.extension.new_galaxy.new_ending} + #end if + #else + --ending bam + --ending csv + --ending fa + --ending fasta + --ending fasta.gz + --ending fastq + --ending fastq.gz + --ending fasta + --ending fq + --ending sam + --ending tabular + --ending text + --ending tsv + --ending txt + --ending xls + --ending xlsx + #end if + #if $directory.startswith('/'): + --path ${directory} + #else + --path $__tool_directory__/${directory} + #end if + --list ${listing} +]]> + </command> + <inputs> + <param name="directory" type="text" label="Directory to read data from." /> + <param name="list_name" type="text" size="25" label="output name" value="input data"/> + <conditional name="results"> + <param name="required" type="select" label="Download data or just directory listing" help="Select type of action required."> + <option value="data" selected="true">Data and listing of selected type</option> + <option value="listing">Get listing of selected file types </option> + </param> + <when value="data"> + <param name="start" type="text" value="" label="String which must be at the start of each file name" /> + <param name="last" type="text" value="" label="String which must be at the end of the file name (excluding the file type)" /> + <conditional name="extension"> + <param name="file_type" type="select" label="File Type" help="File Type."> + <option value="bam">*.bam files</option> + <option value="csv">*.csv files</option> + <option value="fa">*.fa files Files saved as *.fasta for galaxy</option> + <option value="fasta">*.fasta files</option> + <option value="fasta.gz">*.fasta.gz files</option> + <option value="fastq">*.fastq files</option> + <option value="fastq.gz">*.fastq.gz files</option> + <option value="fastq">*.fastq files</option> + <option value="fastq.gz">*.fastq.gz files</option> + <option value="fq">*.fq files Files saved as *.fastq for galaxy</option> + <option value="sam">*.sam files</option> + <option value="tabular">*.tabular Files</option> + <option value="text">*.text Files saved as *.txt for galaxy</option> + <option value="tsv">*.tsv files saved as *.tabular for galaxy</option> + <option value="txt">*.txt Files</option> + <option value="xls">*.xls files</option> + <option value="xlsx">*.xlsx files</option> + </param> + <when value="bam" /> + <when value="csv" /> + <when value="fa" /> + <when value="fasta" /> + <when value="fasta.gz" /> + <when value="fastq" > + <conditional name="new_galaxy"> + <param name="new_ending" type="select" label="Ending to be used for Galaxy" help="Will determine which down stream tools can be used."> + <option value="fastq" selected="true">Keep data as general fastq format</option> + <option value="fastqsanger">Tag data as fastq sanger in galaxy</option> + <option value="fastqsolexa">Tag data as fastq solexa in galaxy</option> + <option value="fastqillumina">Tag data as fastq illumina in galaxy</option> + </param> + <when value="fastq" /> + <when value="fastqsanger" /> + <when value="fastqsolexa" /> + <when value="fastqillumina" /> + </conditional> + </when> + <when value="fastq.gz" > + <conditional name="new_galaxy"> + <param name="new_ending" type="select" label="Ending to be used for Galaxy" help="Will determine which down stream tools can be used."> + <option value="fastq" selected="true" >Keep data as general fastq format</option> + <option value="fastqsanger">Tag data as fastq sanger in galaxy</option> + <option value="fastqsolexa">Tag data as fastq solexa in galaxy</option> + <option value="fastqillumina">Tag data as fastq illumina in galaxy</option> + </param> + <when value="fastq" /> + <when value="fastqsanger" /> + <when value="fastqsolexa" /> + <when value="fastqillumina" /> + </conditional> + </when> + <when value="fq" /> + <when value="sam" /> + <when value="tabular" /> + <when value="text" /> + <when value="tsv" /> + <when value="txt" /> + <when value="xls" /> + <when value="xlsx" /> + </conditional> + </when> + <when value="listing"> + </when> + </conditional> + </inputs> + <outputs> + <data format="txt" name="listing" label="List of files in $list_name"> + </data> + <!-- Ideally galaxy can get the type based on the file extensions. If so just add the type here --> + <collection type="list" label="$list_name" name="data_collection"> + <filter>(results['required'] == 'data')</filter> + <discover_datasets pattern="__designation_and_ext__" directory="output" visible="true" /> + </collection> + </outputs> + <tests> + <test> + <param name="directory" value="test-data" /> + <param name="list_name" value="csv_files" /> + <param name="results|extension|file_type" value="csv"/> + <output name="listing"> + <assert_contents> + <has_line line="sample1.csv" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="csv" file="sample1.csv" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="fa"/> + <output name="listing"> + <assert_contents> + <has_line line="sample1.fasta" /> + <has_line line="other.fasta" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="fasta" file="sample1.fasta" /> + <element name="other" ftype="fasta" file="sample1.fasta" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="fasta"/> + <param name="results|start" value="sam" /> + <output name="listing"> + <assert_contents> + <has_line line="sample1.fasta" /> + <not_has_text text="other.fasta" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="fasta" file="sample1.fasta" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="fasta.gz"/> + <param name="results|last" value="le1" /> + <output name="listing"> + <assert_contents> + <has_line line="sample1.fasta" /> + <not_has_text text="other.fasta" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="fasta" file="sample1.fasta" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="fq"/> + <output name="listing"> + <assert_contents> + <has_line line="sample1.fastq" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="fastq" file="sample1.fastq" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="fastq"/> + <output name="listing"> + <assert_contents> + <has_line line="sample1.fastq" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="fastq" file="sample1.fastq" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="fastq"/> + <param name="results|extension|new_galaxy|new_ending" value="fastqsanger"/> + <output name="listing"> + <assert_contents> + <has_line line="sample1.fastqsanger" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="fastqsanger" file="sample1.fastq" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="fastq.gz"/> + <output name="data_collection"> + <assert_contents> + <has_line line="sample1.fastq" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="fastq" file="sample1.fastq" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="fastq.gz"/> + <param name="results|extension|new_galaxy|new_ending" value="fastqsanger"/> + <output name="listing_fastq_gz"> + <assert_contents> + <has_line line="sample1.fastqsanger" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="fastqsanger" file="sample1.fastq" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="sam"/> + <output name="listing"> + <assert_contents> + <has_line line="sample1.sam" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="sam" file="sample1.sam" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="tabular"/> + <output name="listing"> + <assert_contents> + <has_line line="sample1.tabular" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="tabular" file="sample1.tabular" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="text"/> + <output name="listing_text"> + <assert_contents> + <has_line line="sample1.txt" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="txt" file="sample1.text" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="tsv"/> + <output name="data_collection"> + <assert_contents> + <has_line line="sample1.tabular" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="tabular" file="sample1.tsv" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="txt" /> + <output name="listing"> + <assert_contents> + <has_line line="sample1.txt" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="txt" file="sample1.txt" /> + <element name="sample2" ftype="txt" file="sample2.txt" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="xls"/> + <output name="listing"> + <assert_contents> + <has_line line="sample1.xls" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="xls" file="sample1.xls" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|extension|file_type" value="xlsx"/> + <output name="listing"> + <assert_contents> + <has_line line="sample1.xlsx" /> + </assert_contents> + </output> + <output_collection name="data_collection" type="list"> + <element name="sample1" ftype="xlsx" file="sample1.xlsx" /> + </output_collection> + </test> + <test> + <param name="directory" value="test-data" /> + <param name="results|required" value="listing"/> + <output name="listing_all"> + <assert_contents> + <has_line line="sample1.csv" /> + <has_line line="sample1.fasta" /> + <has_line line="sample1.fasta.gz" /> + <has_line line="sample1.fastq" /> + <has_line line="sample1.fastq.gz" /> + <has_line line="sample1.sam" /> + <has_line line="sample1.tabular" /> + <has_line line="sample1.text" /> + <has_line line="sample1.tsv" /> + <has_line line="sample1.txt" /> + <has_line line="sample1.xls" /> + <has_line line="sample1.xlsx" /> + </assert_contents> + </output> + </test> + + </tests> + + <help> +<![CDATA[ +This tool will lookup files on the Galaxy server machine, including mounted directories. + +Only directories that are included in the white list and not in the black list are allowed. +If the directory you require does not pass the white list or blacklist test please contact the local galaxy admin. +(Admins see README) + +This tool only supports a limited set of types and file extenstions. +No other files are ever returned either by data or listing. +These endings are case senitive. + +==== + +The data options will look for all files that have a particular ending in the selected directory. + +The tool will return two things. + +1. A Dataset collection of all the detected files. + +2. A file with the names of all the detected files. These will be sorted in the same order as galaxy builds the dataset collection. + +The files can be filtered by setting a specific start strinf for the file name. +Only files that start with this string (case senstive) will be included. + +Files can also be filter for the last part before the file extsentions. + +Assuming the directory has: +C01_R1_001.fasta C01_R2_001.fatsa C02_R1_001.fasta C02_R2_001.fatsa + +Setting start C01 will return just the C01 files: C01_R1_001.fasta C01_R2_001.fatsa + +Setting last R1_001 will return the read1 files: C01_R1_001.fasta C02_R1_001.fasta + +As Galaxy detects the file type based on the extension this tool will change the exstension for supported alternative file ends. + This includes (manually) setting the exstension to fastqsanger, fastqsolexa, fastqillumina for tools that specify one of these. + +This tool will unzip gz files. + +==== + +The listing option will return a txt file with all the files found with any of the supported endings. Other files in that directory are not included. + +The file exstensions are left as in the directory. + +File start and last filters are not supported in this mode. +]]> + </help> + <citations> + </citations> + +</tool>