annotate barcode_splitter_multi/barcode_splitter.xml @ 0:50df2d629d51 draft

Uploaded
author hepcat72
date Fri, 26 Aug 2016 16:30:56 -0400
parents
children 5e0fd61660b7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
1 <tool id="cshl_princeton_fastx_barcode_splitter" version="0.4" name="Barcode Splitter">
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
2 <description></description>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
3 <command interpreter="bash" detect_errors="aggressive"><![CDATA[
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
4 barcode_splitter_galaxy_wrapper.sh split
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
5 #for $sf in $seqfiles
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
6 ${sf.input.extension}
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
7 #break
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
8 #end for
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
9 --bcfile $bcfile --mismatches $mismatches --galaxy $zip $barcodes_at_end
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
10 #for $sf in $seqfiles
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
11 ${sf.input}
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
12 #end for
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
13 --idxread
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
14 #set $bound = $num_barcode_columns.value + 1
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
15 #for $n in range( 1, $bound )
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
16 ${n}
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
17 #end for
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
18 > $summary
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
19 ]]>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
20 </command>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
21
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
22 <inputs>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
23 <param format="txt" name="bcfile" type="data" label="Barcode File" help="Tab-delimited text file where the first column is a sample ID and subsequent columns are barcodes." />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
24 <param name="num_barcode_columns" type="integer" size="2" value="1" label="Number of barcode columns" help="The number of columns in the barcode file containing barcode sequences. Note that you must submit at least this many read files." />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
25
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
26
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
27 <repeat name="seqfiles" title="Read Files" min="1" default="2">
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
28 <param format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" name="input" type="data" label="Library to split" help="Barcoded reads files must be first. If there are multiple barcode columns in the barcode file, the files must be supplied in the same order as the barcode columns (from left to right)." />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
29 </repeat>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
30
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
31 <param name="mismatches" type="integer" size="3" value="0" label="Number of allowed mismatches" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
32
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
33 <param name="barcodes_at_end" type="boolean" truevalue="--barcodes_at_end" falsevalue="" checked="false"
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
34 label="Barcodes are at the end of all sequences" help="Default is the beginning of all sequences" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
35
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
36 <param name="zip" type="boolean" truevalue="--gzip" falsevalue="" checked="false"
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
37 label="Compress/zip the output" help="This generates reads files with a .gz extension. Default is based on the file extension of the first input file." />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
38
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
39 </inputs>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
40
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
41 <outputs>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
42 <data format="tabular" name="summary" label="${tool.name} on ${on_string}: Summary" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
43 <collection name="split_output" type="list" format_source="input" label="${tool.name} on ${on_string}">
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
44 <discover_datasets pattern="__designation_and_ext__" directory="split" visible="false" label="${designation}"/>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
45 </collection>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
46 </outputs>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
47
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
48 <tests>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
49 <test>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
50 <!-- Split a FASTQ file -->
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
51 <param name="bcfile" value="barcode_splitter1.txt" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
52 <param name="num_barcode_columns" value="1" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
53 <repeat name="seqfiles">
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
54 <param name="input" value="barcode_splitter1.fastq" ftype="fastqsolexa" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
55 </repeat>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
56 <param name="barcodes_at_end" value="" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
57 <param name="mismatches" value="2" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
58 <output name="summary" file="barcode_splitter1.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
59 <collection name="output" type="list">
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
60 <discovered_dataset designation="BC1" ftype="fastqsolexa" file="barcode_splitter1_BC1.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
61 <discovered_dataset designation="BC2" ftype="fastqsolexa" file="barcode_splitter1_BC2.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
62 <discovered_dataset designation="BC3" ftype="fastqsolexa" file="barcode_splitter1_BC3.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
63 <discovered_dataset designation="BC4" ftype="fastqsolexa" file="barcode_splitter1_BC4.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
64 <discovered_dataset designation="unmatched" ftype="fastqsolexa" file="barcode_splitter1_unmatched.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
65 </collection>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
66 </test>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
67
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
68 <test>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
69 <!-- Split a FASTQ file, using separate index read -->
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
70 <param name="bcfile" value="barcode_splitter1.txt" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
71 <param name="num_barcode_columns" value="1" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
72 <repeat name="seqfiles">
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
73 <param name="input" value="barcode_splitter_index.fastq" ftype="fastqsolexa" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
74 </repeat>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
75 <repeat name="seqfiles">
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
76 <param name="input" value="barcode_splitter1.fastq" ftype="fastqsolexa" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
77 </repeat>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
78 <param name="barcodes_at_end" value="" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
79 <param name="mismatches" value="2" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
80 <output name="output" file="barcode_splitter1.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
81 <collection name="split_output" type="list">
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
82 <discovered_dataset designation="BC1" ftype="fastqsolexa" file="barcode_splitter1_BC1.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
83 <discovered_dataset designation="BC2" ftype="fastqsolexa" file="barcode_splitter1_BC2.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
84 <discovered_dataset designation="BC3" ftype="fastqsolexa" file="barcode_splitter1_BC3.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
85 <discovered_dataset designation="BC4" ftype="fastqsolexa" file="barcode_splitter1_BC4.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
86 <discovered_dataset designation="unmatched" ftype="fastqsolexa" file="barcode_splitter1_unmatched.out" />
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
87 </collection>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
88 </test>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
89 </tests>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
90
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
91 <help><![CDATA[
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
92 **What it does**
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
93
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
94 This tool splits a FASTQ file into several files, using barcodes as the split criteria. Barcodes in one file can be used to split multiple sorted files. Multiple sets of barcodes, each located in a different file, can be used.
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
95
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
96 --------
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
97
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
98 **Barcode file Format**
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
99
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
100 Barcode files are simple text files.
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
101 Each line should contain an identifier (descriptive name for the barcode), and at least 1 barcode, separated by TAB characters. Multiple columns of barcodes are supported (each corresponding to a separate barcoded read file), though there's usually just 1. An example of the usage of multiple sets of barcodes could be the first set of barcodes can denote user and the second set can be each user's sample barcodes.
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
102 Example::
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
103
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
104 #This line is a comment (starts with a 'number' sign)
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
105 BC1 GATCT TTGCAT
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
106 BC2 ATCGT GCGCAT
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
107 BC3 GTGAT AGGTCA
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
108 BC4 TGTCT CTTTGG
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
109
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
110 For each barcode, a new FASTQ file will be created (with the barcodes' identifier as part of the file name).
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
111 Sequences matching the barcodes in a row will be stored in the appropriate file.
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
112
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
113 The first sequence file submitted must contain sequences with the barcodes in the first column of the barcode file. The second sequence file must contain sequences with the barcodes in the second column, and so on. The 'Number of barcode columns' specified must match the number of actual columns in the barcode file.
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
114
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
115 One (possibly two) additional FASTQ files will be created: the 'unmatched' file (and the 'multimatched' file), where sequences not matching any barcode (or matching more than 1 barcode when mismatches are taken into account) will be stored.
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
116
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
117 The output of this tool is a summary table displaying the split counts for each barcode identifier and the percentage of the total reads those represent.
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
118 In addition, each fastq file produced will be loaded into the galaxy history as part of a collection list.
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
119 ]]>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
120 </help>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
121
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
122 <!-- Barcode-Splitter is part of the paired_sequence_utils package, by L.Parsons (lparsons@princeton.edu) and R.Leach (rleach@princeton.edu) -->
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
123 <citations>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
124 <citation type="bibtex">
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
125 @misc{paired_sequence_utils,
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
126 title = {{Barcode}-{Splitter}},
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
127 url = {https://bitbucket.org/hepcat72/paired_sequence_utils},
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
128 author = "Parsons, Lance and Leach, Robert"
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
129 }
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
130 </citation>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
131 </citations>
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
132
50df2d629d51 Uploaded
hepcat72
parents:
diff changeset
133 </tool>