barcode_splitter_multi: barcode_splitter_multi/barcode

annotate barcode_splitter_multi/barcode_splitter.xml @ 0:50df2d629d51 draft

Uploaded

author	hepcat72
date	Fri, 26 Aug 2016 16:30:56 -0400
parents
children	5e0fd61660b7

rev	line source
0 50df2d629d51 Uploaded hepcat72 parents: diff changeset	1 <tool id="cshl_princeton_fastx_barcode_splitter" version="0.4" name="Barcode Splitter">
50df2d629d51 Uploaded hepcat72 parents: diff changeset	2 <description></description>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	3 <command interpreter="bash" detect_errors="aggressive"><![CDATA[
50df2d629d51 Uploaded hepcat72 parents: diff changeset	4 barcode_splitter_galaxy_wrapper.sh split
50df2d629d51 Uploaded hepcat72 parents: diff changeset	5 #for $sf in $seqfiles
50df2d629d51 Uploaded hepcat72 parents: diff changeset	6 ${sf.input.extension}
50df2d629d51 Uploaded hepcat72 parents: diff changeset	7 #break
50df2d629d51 Uploaded hepcat72 parents: diff changeset	8 #end for
50df2d629d51 Uploaded hepcat72 parents: diff changeset	9 --bcfile $bcfile --mismatches $mismatches --galaxy $zip $barcodes_at_end
50df2d629d51 Uploaded hepcat72 parents: diff changeset	10 #for $sf in $seqfiles
50df2d629d51 Uploaded hepcat72 parents: diff changeset	11 ${sf.input}
50df2d629d51 Uploaded hepcat72 parents: diff changeset	12 #end for
50df2d629d51 Uploaded hepcat72 parents: diff changeset	13 --idxread
50df2d629d51 Uploaded hepcat72 parents: diff changeset	14 #set $bound = $num_barcode_columns.value + 1
50df2d629d51 Uploaded hepcat72 parents: diff changeset	15 #for $n in range( 1, $bound )
50df2d629d51 Uploaded hepcat72 parents: diff changeset	16 ${n}
50df2d629d51 Uploaded hepcat72 parents: diff changeset	17 #end for
50df2d629d51 Uploaded hepcat72 parents: diff changeset	18 > $summary
50df2d629d51 Uploaded hepcat72 parents: diff changeset	19 ]]>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	20 </command>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	21
50df2d629d51 Uploaded hepcat72 parents: diff changeset	22 <inputs>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	23 <param format="txt" name="bcfile" type="data" label="Barcode File" help="Tab-delimited text file where the first column is a sample ID and subsequent columns are barcodes." />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	24 <param name="num_barcode_columns" type="integer" size="2" value="1" label="Number of barcode columns" help="The number of columns in the barcode file containing barcode sequences. Note that you must submit at least this many read files." />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	25
50df2d629d51 Uploaded hepcat72 parents: diff changeset	26
50df2d629d51 Uploaded hepcat72 parents: diff changeset	27 <repeat name="seqfiles" title="Read Files" min="1" default="2">
50df2d629d51 Uploaded hepcat72 parents: diff changeset	28 <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" name="input" type="data" label="Library to split" help="Barcoded reads files must be first. If there are multiple barcode columns in the barcode file, the files must be supplied in the same order as the barcode columns (from left to right)." />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	29 </repeat>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	30
50df2d629d51 Uploaded hepcat72 parents: diff changeset	31 <param name="mismatches" type="integer" size="3" value="0" label="Number of allowed mismatches" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	32
50df2d629d51 Uploaded hepcat72 parents: diff changeset	33 <param name="barcodes_at_end" type="boolean" truevalue="--barcodes_at_end" falsevalue="" checked="false"
50df2d629d51 Uploaded hepcat72 parents: diff changeset	34 label="Barcodes are at the end of all sequences" help="Default is the beginning of all sequences" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	35
50df2d629d51 Uploaded hepcat72 parents: diff changeset	36 <param name="zip" type="boolean" truevalue="--gzip" falsevalue="" checked="false"
50df2d629d51 Uploaded hepcat72 parents: diff changeset	37 label="Compress/zip the output" help="This generates reads files with a .gz extension. Default is based on the file extension of the first input file." />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	38
50df2d629d51 Uploaded hepcat72 parents: diff changeset	39 </inputs>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	40
50df2d629d51 Uploaded hepcat72 parents: diff changeset	41 <outputs>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	42 <data format="tabular" name="summary" label="${tool.name} on ${on_string}: Summary" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	43 <collection name="split_output" type="list" format_source="input" label="${tool.name} on ${on_string}">
50df2d629d51 Uploaded hepcat72 parents: diff changeset	44 <discover_datasets pattern="__designation_and_ext__" directory="split" visible="false" label="${designation}"/>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	45 </collection>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	46 </outputs>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	47
50df2d629d51 Uploaded hepcat72 parents: diff changeset	48 <tests>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	49 <test>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	50 <!-- Split a FASTQ file -->
50df2d629d51 Uploaded hepcat72 parents: diff changeset	51 <param name="bcfile" value="barcode_splitter1.txt" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	52 <param name="num_barcode_columns" value="1" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	53 <repeat name="seqfiles">
50df2d629d51 Uploaded hepcat72 parents: diff changeset	54 <param name="input" value="barcode_splitter1.fastq" ftype="fastqsolexa" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	55 </repeat>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	56 <param name="barcodes_at_end" value="" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	57 <param name="mismatches" value="2" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	58 <output name="summary" file="barcode_splitter1.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	59 <collection name="output" type="list">
50df2d629d51 Uploaded hepcat72 parents: diff changeset	60 <discovered_dataset designation="BC1" ftype="fastqsolexa" file="barcode_splitter1_BC1.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	61 <discovered_dataset designation="BC2" ftype="fastqsolexa" file="barcode_splitter1_BC2.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	62 <discovered_dataset designation="BC3" ftype="fastqsolexa" file="barcode_splitter1_BC3.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	63 <discovered_dataset designation="BC4" ftype="fastqsolexa" file="barcode_splitter1_BC4.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	64 <discovered_dataset designation="unmatched" ftype="fastqsolexa" file="barcode_splitter1_unmatched.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	65 </collection>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	66 </test>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	67
50df2d629d51 Uploaded hepcat72 parents: diff changeset	68 <test>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	69 <!-- Split a FASTQ file, using separate index read -->
50df2d629d51 Uploaded hepcat72 parents: diff changeset	70 <param name="bcfile" value="barcode_splitter1.txt" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	71 <param name="num_barcode_columns" value="1" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	72 <repeat name="seqfiles">
50df2d629d51 Uploaded hepcat72 parents: diff changeset	73 <param name="input" value="barcode_splitter_index.fastq" ftype="fastqsolexa" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	74 </repeat>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	75 <repeat name="seqfiles">
50df2d629d51 Uploaded hepcat72 parents: diff changeset	76 <param name="input" value="barcode_splitter1.fastq" ftype="fastqsolexa" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	77 </repeat>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	78 <param name="barcodes_at_end" value="" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	79 <param name="mismatches" value="2" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	80 <output name="output" file="barcode_splitter1.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	81 <collection name="split_output" type="list">
50df2d629d51 Uploaded hepcat72 parents: diff changeset	82 <discovered_dataset designation="BC1" ftype="fastqsolexa" file="barcode_splitter1_BC1.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	83 <discovered_dataset designation="BC2" ftype="fastqsolexa" file="barcode_splitter1_BC2.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	84 <discovered_dataset designation="BC3" ftype="fastqsolexa" file="barcode_splitter1_BC3.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	85 <discovered_dataset designation="BC4" ftype="fastqsolexa" file="barcode_splitter1_BC4.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	86 <discovered_dataset designation="unmatched" ftype="fastqsolexa" file="barcode_splitter1_unmatched.out" />
50df2d629d51 Uploaded hepcat72 parents: diff changeset	87 </collection>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	88 </test>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	89 </tests>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	90
50df2d629d51 Uploaded hepcat72 parents: diff changeset	91 <help><![CDATA[
50df2d629d51 Uploaded hepcat72 parents: diff changeset	92 What it does
50df2d629d51 Uploaded hepcat72 parents: diff changeset	93
50df2d629d51 Uploaded hepcat72 parents: diff changeset	94 This tool splits a FASTQ file into several files, using barcodes as the split criteria. Barcodes in one file can be used to split multiple sorted files. Multiple sets of barcodes, each located in a different file, can be used.
50df2d629d51 Uploaded hepcat72 parents: diff changeset	95
50df2d629d51 Uploaded hepcat72 parents: diff changeset	96 --------
50df2d629d51 Uploaded hepcat72 parents: diff changeset	97
50df2d629d51 Uploaded hepcat72 parents: diff changeset	98 Barcode file Format
50df2d629d51 Uploaded hepcat72 parents: diff changeset	99
50df2d629d51 Uploaded hepcat72 parents: diff changeset	100 Barcode files are simple text files.
50df2d629d51 Uploaded hepcat72 parents: diff changeset	101 Each line should contain an identifier (descriptive name for the barcode), and at least 1 barcode, separated by TAB characters. Multiple columns of barcodes are supported (each corresponding to a separate barcoded read file), though there's usually just 1. An example of the usage of multiple sets of barcodes could be the first set of barcodes can denote user and the second set can be each user's sample barcodes.
50df2d629d51 Uploaded hepcat72 parents: diff changeset	102 Example::
50df2d629d51 Uploaded hepcat72 parents: diff changeset	103
50df2d629d51 Uploaded hepcat72 parents: diff changeset	104 #This line is a comment (starts with a 'number' sign)
50df2d629d51 Uploaded hepcat72 parents: diff changeset	105 BC1 GATCT TTGCAT
50df2d629d51 Uploaded hepcat72 parents: diff changeset	106 BC2 ATCGT GCGCAT
50df2d629d51 Uploaded hepcat72 parents: diff changeset	107 BC3 GTGAT AGGTCA
50df2d629d51 Uploaded hepcat72 parents: diff changeset	108 BC4 TGTCT CTTTGG
50df2d629d51 Uploaded hepcat72 parents: diff changeset	109
50df2d629d51 Uploaded hepcat72 parents: diff changeset	110 For each barcode, a new FASTQ file will be created (with the barcodes' identifier as part of the file name).
50df2d629d51 Uploaded hepcat72 parents: diff changeset	111 Sequences matching the barcodes in a row will be stored in the appropriate file.
50df2d629d51 Uploaded hepcat72 parents: diff changeset	112
50df2d629d51 Uploaded hepcat72 parents: diff changeset	113 The first sequence file submitted must contain sequences with the barcodes in the first column of the barcode file. The second sequence file must contain sequences with the barcodes in the second column, and so on. The 'Number of barcode columns' specified must match the number of actual columns in the barcode file.
50df2d629d51 Uploaded hepcat72 parents: diff changeset	114
50df2d629d51 Uploaded hepcat72 parents: diff changeset	115 One (possibly two) additional FASTQ files will be created: the 'unmatched' file (and the 'multimatched' file), where sequences not matching any barcode (or matching more than 1 barcode when mismatches are taken into account) will be stored.
50df2d629d51 Uploaded hepcat72 parents: diff changeset	116
50df2d629d51 Uploaded hepcat72 parents: diff changeset	117 The output of this tool is a summary table displaying the split counts for each barcode identifier and the percentage of the total reads those represent.
50df2d629d51 Uploaded hepcat72 parents: diff changeset	118 In addition, each fastq file produced will be loaded into the galaxy history as part of a collection list.
50df2d629d51 Uploaded hepcat72 parents: diff changeset	119 ]]>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	120 </help>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	121
50df2d629d51 Uploaded hepcat72 parents: diff changeset	122 <!-- Barcode-Splitter is part of the paired_sequence_utils package, by L.Parsons (lparsons@princeton.edu) and R.Leach (rleach@princeton.edu) -->
50df2d629d51 Uploaded hepcat72 parents: diff changeset	123 <citations>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	124 <citation type="bibtex">
50df2d629d51 Uploaded hepcat72 parents: diff changeset	125 @misc{paired_sequence_utils,
50df2d629d51 Uploaded hepcat72 parents: diff changeset	126 title = {{Barcode}-{Splitter}},
50df2d629d51 Uploaded hepcat72 parents: diff changeset	127 url = {https://bitbucket.org/hepcat72/paired_sequence_utils},
50df2d629d51 Uploaded hepcat72 parents: diff changeset	128 author = "Parsons, Lance and Leach, Robert"
50df2d629d51 Uploaded hepcat72 parents: diff changeset	129 }
50df2d629d51 Uploaded hepcat72 parents: diff changeset	130 </citation>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	131 </citations>
50df2d629d51 Uploaded hepcat72 parents: diff changeset	132
50df2d629d51 Uploaded hepcat72 parents: diff changeset	133 </tool>

Mercurial > repos > hepcat72 > barcode_splitter_multi

annotate barcode_splitter_multi/barcode_splitter.xml @ 0:50df2d629d51 draft