view jemultiplexer.xml @ 4:861cbe4eca25 draft default tip

Correct path to java file
author gbcs-embl-heidelberg
date Mon, 26 Jan 2015 08:09:03 -0500
parents 321b695b1a33
children
line wrap: on
line source

<tool id="jedebarcoding" name="Jemultiplexer">
	<description>Demultiplexes multiplexed data</description>
	<requirements>
		<requirement type="package" version=">=1.6">java</requirement>
	</requirements>
	<command interpreter="python">
	jemultiplexer.py
	$MpxData1 
	$output1 
	$output1.id 
	$bsinputtype.barcodes 
	"$bsinputtype.barcode_list"
	$__new_file_path__
	$MpxData1.ext
	$bcodelen
	$qualityFormat
	$maxMismatches
	$minBaseQuality
	$minMismatchingDelta
	$clipBarcodeCon.xTrimLen
	$zTrimLen
	$clipBarcodeCon.clipBarcode
	$addBarcodeToHeader
	$gzipOutput
	$barcodeDiagFile
	$rChar
	#if $singlePaired.sPaired  == "paired":
	  $singlePaired.barcodeReadPosCon.barcodeReadPos
	  #if $singlePaired.barcodeReadPosCon.barcodeReadPos == "BOTH":
	    $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.barcodeForSampleMatching
	  	#if $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.barcodeForSampleMatching == "BOTH":
	  	  $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcodeCon.redundantBarcode
	  	  $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcodeCon.strict
	  	  $singlePaired.MpxData2
	  	#else:
	  	  $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.redundantBarcode
	  	  $singlePaired.barcodeReadPosCon.barcodeForSampleMatchingCon.strict
	  	  $singlePaired.MpxData2
	  	#end if
	  #else:
	  	$singlePaired.barcodeReadPosCon.barcodeForSampleMatching
		$singlePaired.barcodeReadPosCon.redundantBarcode
		$singlePaired.barcodeReadPosCon.strict
		$singlePaired.MpxData2
	  #end if
	#else:
		$singlePaired.barcodeReadPos
		$singlePaired.barcodeForSampleMatching
		$singlePaired.redundantBarcode
		$singlePaired.strict
		$singlePaired.MpxData2
	#end if
	</command>
	<inputs>		
		<param type="data" format="gz,fastq" name="MpxData1" label="Compressed (or not) FASTQ file" />
		<conditional name="singlePaired">
			<param name="sPaired" type="select" label="Is this library mate-paired?">
				<option value="single">Single-end</option>
				<option value="paired">Paired-end</option>
			</param>
			<when value="single">
				<param name="MpxData2" type="hidden" value="single" />
				<param name="barcodeReadPos" type="hidden" value="none" />
				<param name="redundantBarcode" type="hidden" value="none" />
				<param name="barcodeForSampleMatching" type="hidden" value="none" />
				<param name="strict" type="hidden" value="none" />
			</when>
			<when value="paired">
				<param name="MpxData2" type="data" format="gz,fastq" label="Compressed (or not) FASTQ file" />
				<conditional name="barcodeReadPosCon">
					<param name="barcodeReadPos" type="select" label="Barcode read position (BPOS)" help="where are the barcodes">
						<option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option>
						<option value="READ_2">READ_2 (beginning of read from the second fastq file)</option>
						<option value="BOTH">BOTH (beginning of both reads)</option>
					</param>
					<when value="READ_1">
						<param name="redundantBarcode" type="hidden" value="true" />
						<param name="barcodeForSampleMatching" type="hidden" value="BOTH" />
						<param name="strict" type="hidden" value="false" />
					</when>
					<when value="READ_2">
						<param name="redundantBarcode" type="hidden" value="true" />
						<param name="barcodeForSampleMatching" type="hidden" value="BOTH" />
						<param name="strict" type="hidden" value="false" />
					</when>
					<when value="BOTH">
						<conditional name="barcodeForSampleMatchingCon">
							<param name="barcodeForSampleMatching" type="select" label="Barcode for sample matching (BM)" help="which barcode should be used for sample look up (BM option)." >
								<option value="READ_1" selected="true">READ_1 (beginning of read from the first fastq file)</option>
								<option value="READ_2">READ_2 (beginning of read from the second fastq file)</option>
								<option value="BOTH">BOTH (beginning of both reads)</option>
							</param>
							<when value="READ_1">
								<param name="redundantBarcode" type="hidden" value="true" />
								<param name="strict" type="hidden" value="false" />
							</when>
							<when value="READ_2">
								<param name="redundantBarcode" type="hidden" value="true" />
								<param name="strict" type="hidden" value="false" />
							</when>
							<when value="BOTH">
								<conditional name="redundantBarcodeCon">
									<param name="redundantBarcode" type="select" label="Redundant barcodes (BRED)" help="are the barcode REDUNDANT i.e. do they both resolve to the same sample (BRED option).">
										<option value="true">True</option>
										<option value="false" selected="true">False</option>
									</param>
									<when value="true">
										<param name="strict" type="select" label="Strict (S)" help="tells whether both barcodes should resolve to the same sample." >
											<option value="true" selected="true">True</option>
											<option value="false">False</option>
										</param>
									</when>
									<when value="false">
										<param name="strict" type="hidden" value="false" />
									</when>
								</conditional>
							</when>
						</conditional>
					</when>
				</conditional>
			</when>	    
		</conditional>
	  
		<conditional name="bsinputtype">		
			<param name="bsinputtype_selector" type="select" label="Barcode set input type" help="You can either submit a barcode file or paste the list in a text field. Note: one sample per line.">
				<option value="bs_file" selected="true">Use a .bs tab-delimited file in the history</option>
				<option value="bs_textfield">Paste the barcodes list in a text field within the form</option>
			</param>
			<when value="bs_file">
				<param name="barcodes" type="data" format="bs" label="Barcode Set" />
				<param name="barcode_list" type="hidden" value="none" />
			</when>
			<when value="bs_textfield">
				<param name="barcode_list" type="text" area="True" size="10x30" label="Barcode Set (&#60;span style=&#34;color:brown;&#34;&#62;one sample per line: &#38;#60&#59;sample_name&#38;#62&#59;&#38;#60&#59;tab or space&#38;#62&#59;&#38;#60&#59;barcode&#38;#62&#59;&#60;/span&#62;)"/>
				<param name="barcodes" type="hidden" value="none" />
			</when>		
		</conditional>	
	  
		<param name="bcodelen" type="text" value="6" label="Barcode Length (LEN)" /> 
	  
		<param name="qualityFormat" type="select" label="Fastq Quality Format (V)" help="if you need other quality coding format, contact the galaxy administrators.">
			<option value="Standard" selected="true">Fastq - Illumina Casava V1.8 with Sanger coding quality (phred scaling + 33)</option>
			<option value="Illumina">Fastq - Illumina v1.3 or above coding of quality (phred scaling + 64)</option>
			<option value="Solexa">Fastq - Solexa-style quality (solexa scaling + 66)</option>
		</param>
		<param name="maxMismatches" type="text" value="1" label="Maxinum Mismatches (MM)" help="maximum mismatches for a barcode to be considered a match." /> 	  
		<param name="minBaseQuality" type="text" value="10" label="Minimum base quality (Q)" help="any barcode bases falling below this quality will be considered a mismatch even in the bases match." />
		<param name="minMismatchingDelta" type="text" value="1" label="Minimum mismatch difference (MMD)" help="Minimum difference between number of mismatches in the best and second best barcodes for a barcode to be considered a match." /> 
		<param name="zTrimLen" type="text" value="0" label="Extra number of bases to be trimmed from the barcode end (ZT)" />
		<conditional name="clipBarcodeCon">
			<param name="clipBarcode" type="select" label="Remove barcode sequence from read (C)" >
				<option value="true" selected="true">True</option>
				<option value="false">False</option>
			</param>
			<when value="true">
				<param name="xTrimLen" type="text" value="1" label="Extra number of base to be trimmed right after the barcode (XT)" />
			</when>
			<when value="false">
				<param name="xTrimLen" type="hidden" value="1" />
			</when>
		</conditional>
		<param name="addBarcodeToHeader" type="select" label="Add matched barcode at the end of the read header (ADD)" >
			<option value="true" selected="true">True</option>
			<option value="false">False</option>
		</param>
		<param name="gzipOutput" type="select" label="Compress output (GZ)" >
			<option value="true" selected="true">True</option>
			<option value="false">False</option>
		</param>
		<param name="barcodeDiagFile" type="select" label="Output barcode match reporting file (DIAG)" >
			<option value="false" selected="false">False</option>
			<option value="true">True</option>
		</param>
		<param name="rChar" type="select" label="Replace white space in the read name/header with speficied symbols (RCHAR)" >
			<option value="1" selected="1"> </option>
			<option value="2">:</option>
			<option value="3">_</option>
			<option value="4">-</option>
		</param>
	</inputs>
	<outputs>
		<data format="html" name="output1" label="Demultiplexing stats on ${on_string}"/> 
	    <!-- Keep this as a nice example of reformating;
	      <data format="fastqsanger" name="output1" metadata_source="MpxData1" label="Demultiplexing stats on ${on_string}">
		<change_format>
		  <when input="MpxData1.ext" value="fastqillumina" format="fastqillumina" />
		</change_format>  
		</data> -->
	</outputs>
	
	<tests>
		<test>
			<param name="MpxData1" value="C1WLBACXX_lane7_1_sequence.txt" />
			<param name="MpxData2" value="C1WLBACXX_lane7_2_sequence.txt" />
			<param name="sPaired" value="paired" />
			<param name="bsinputtype_selector" value="bs_file" />
			<param name="barcodes" value="correct_barcodes_PE_both-ends_with_fnames.txt" />
			<param name="barcodeReadPos" value="BOTH" />
			<param name="barcodeForSampleMatching" value="BOTH" />
			<param name="redundantBarcode" value="false" />
			<param name="bcodelen" value="6" />
			<param name="qualityFormat" value="Standard" />
			<param name="maxMismatches" value="3" />
			<param name="minBaseQuality" value="20" />
			<param name="minMismatchingDelta" value="2" />
			<param name="zTrimLen" value="0" />
			<param name="clipBarcode" value="true" />
			<param name="xTrimLen" value="1" />
			<param name="addBarcodeToHeader" value="true" />
			<param name="gzipOutput" value="true" />
			<param name="barcodeDiagFile" value="true" />
			<param name="rChar" value="1" />
			<output name="output1" file="result.html" ftype="html"/>
		</test>
	</tests>
	
	<help>

**What it does**

Jemultiplexer : A fastq files demultiplexer with many neat options. Input files are fastq files, and can be in gzip compressed format (end in .gz).

Author: Charles Girardot  (charles.girardot@embl.de).

Version: 1.0.4

------

**Know what you are doing**

.. class:: warningmark

You will want to read the `documentation`__.

 .. __: http://gbcs.embl.de/jemultiplexer

------

**Jemultiplexer parameter list**

This is an exhaustive list of Jemultiplexer options::

  FASTQ_FILE1=File
  F1=File                     Input fastq file (optionally gzipped) for single end data, or first read in paired end data. 
                              Required. 

  FASTQ_FILE2=File
  F2=File                     Input fastq file (optionally gzipped) for the second read of paired end data. 
                              Default value: null. 

  BARCODE_FILE=File
  BF=File                     Barcode file describing sequence list and sample names. Tab-delimited file with 2 
                              columns, with the sample in col1 and the corresponding barcode in col2.
                              Note, make sure one sample per line.
                              If multiple barcode map to the same sample, barcodes can be combined using the OR operator '|'.
                              i.e. the file above can be re-written like
                               		sample1	ATAT|GAGG
                              		sample2	CCAA|TGTG
                              Finally, for the special situation of paired-end data in which barcodes differ at both 
                              ends (ie BPOS=BOTH BRED=false BM=BOTH , see BRED option description), barcodes for read_1 
                              and read_2 can be distinguished using a ':' separator i.e. 
                              		sample1	ATAT:GAGG
                              		sample2	CCAA:TGTG
                              Here understand that sample 1 is encoded with ATAT barcode at read_1 AND GAGG barcode at 
                              read_2. Note that you can still combine barcodes using | e.g. 
                              		sample1	ATAT|GAGG:CCAA|TGTG
                              would mean that sample 1 is mapped by the combination of barcode: ATAT OR GAGG at read_1 
                              AND CCAA OR TGTG at read_2.
                              Required. 

  BARCODE_READ_POS=BarcodePosition
  BPOS=BarcodePosition        For paired-end data, where to expect the barcode(s) : READ_1 (beginning of read from 
                              FASTQ_FILE_1), READ_2 (beginning of read from FASTQ_FILE_2), BOTH (beginning of both 
                              reads). Automatically set to READ_1 in single end mode.
                              Default value: BOTH. This option can be set to 'null' to clear the default value. 
                              Possible values: {READ_1, READ_2, BOTH} 

  REDUNDANT_BARCODES=Boolean
  BRED=Boolean                For paired-end data and when BARCODE_READ_POS == BOTH, this option indicates if both 
                              read's barcodes encode redundant information, which is the usual situation 
                              (REDUNDANT_BARCODES=true) i.e. barcodes are supposed to be the same at both ends or to 
                              resolve to the same sample (when a pool of barcodes has been used for each sample).
                              When REDUNDANT_BARCODES=false, the 2 barcodes potentially encode different 
                              information. For example, only one of the barcodes encodes the sample the read belongs to 
                              while the second barcode might be a random barcode to tell apart PCR artefacts from real 
                              duplicates. Another example is when both barcodes should be used  in a combined fashion 
                              to resolve the sample. In the first example, you should use BPOS=BOTH BRED=false 
                              BM=READ_1 while in the second example, you should have BPOS=BOTH BRED=false BM=BOTH (note 
                              that with BPOS=BOTH BRED=true BM=BOTH), the behavior would be different as Jemultiplexer 
                              would then check the STRICT option to perform sample resolution.
                              Importantly, when BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, BLEN, barcode 
                              matching options (MM, MMD, Q) and read trimming/clipping options (XT, ZT) accept 
                              different values for both barcodes in the form X:Z where X and Z are 2 integers.
                              Default value: true. This option can be set to 'null' to clear the default value. 
                              Possible values: {true, false} 

  BARCODE_FOR_SAMPLE_MATCHING=BarcodePosition
  BM=BarcodePosition          Automatically set to READ_1 in single end mode. 
                              For paired-end data and when BARCODE_READ_POS == BOTH, which barcode should be used to 
                              resolve sample :
                              	- use BM=READ_1 (beginning of read from FASTQ_FILE_1) if only this read should be used 
                              for sample matching,
                              	- use BM=READ_2 (beginning of read from FASTQ_FILE_2) if only this read should be used 
                              for sample matching,
                              	- use BM=BOTH (beginning of both reads) if both should be used ; when BM=BOTH, the 
                              behaviour of Jemultiplexer is different based on the value of REDUNDANT_BARCODES.
                              		If REDUNDANT_BARCODES=true, the two barcodes are considered to map to the same sample 
                              and Jemultiplexer uses the two barcodes according to the STRICT value.
                              		If REDUNDANT_BARCODES=false, the barcode file should map a couple of barcode to each 
                              sample (e.g. sample1 => AGAGTG:TTGATA) and Jemultiplexer needs both barcodes to find the 
                              relevant sample. Note that this is the only situation in which all barcode matching 
                              options (MM, MMD, Q) accept different values for both barcodes in the form X:Z where X 
                              and Z are 2 integers.
                              Default value: BOTH. This option can be set to 'null' to clear the default value. 
                              Possible values: {READ_1, READ_2, BOTH} 

  STRICT=Boolean
  S=Boolean                   For paired-end data and when BARCODE_READ_POS == BOTH and BM=BOTH, tells whether both 
                              barcodes should resolve to the same sample. When true and if only one of the two reads 
                              has a barcode match, the read pair is ignored. When false and if only one of the two 
                              reads has a barcode match, the read pair is assigned to the corresponding sample ; in 
                              cases where reads resolve to different samples, the read pair is ignored.
                              Default value: false. This option can be set to 'null' to clear the default value. 
                              Possible values: {true, false} 

  BCLEN=String
  LEN=String                  Length of the barcode sequences, optional. Taken from barcode file when not given.
                              In situations where BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, two distinct 
                              length can be provided using the syntax LEN=X:Z where X and Z are 2 integers representing 
                              the barcode length for read_1 and read_2 respectively.
                              Default value: null. 

  MAX_MISMATCHES=String
  MM=String                   Maximum mismatches for a barcode to be considered a match. MM=null is like MM=0
                              In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH 
                              (note that most likely BRED=false as it does not make great sense otherwise), two 
                              distinct values can be given here using the syntax MM=X:Z where X and Z are 2 integers to 
                              use for read_1 and read_2 respectively.
                              Default value: 1. This option can be set to 'null' to clear the default value. 

  MIN_MISMATCH_DELTA=String
  MMD=String                  Minimum difference between number of mismatches in the best and second best barcodes for 
                              a barcode to be considered a match. MMD=null is like MMD=0
                              In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH 
                              (note that most likely BRED=false as it does not make great sense otherwise), two 
                              distinct values can be given here using the syntax MMD=X:Z where X and Z are 2 integers 
                              to use for read_1 and read_2 respectively.
                              Default value: 1. This option can be set to 'null' to clear the default value. 

  MIN_BASE_QUALITY=String
  Q=String                    Minimum base quality. Any barcode bases falling below this quality will be considered a 
                              mismatch even in the bases match. Q=null is like Q=0.
                              In situations where both barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH 
                              (note that most likely BRED=false as it does not make great sense otherwise), two 
                              distinct values can be given here using the syntax Q=X:Z where X and Z are 2 integers to 
                              use for read_1 and read_2 respectively.
                              Default value: 10. This option can be set to 'null' to clear the default value. 

  XTRIMLEN=String
  XT=String                   Extra number of base to be trimmed right after the barcode (only used if 
                              CLIP_BARCODE=true). Default is 1 as an extra 'T' (or 'A' depending how you see it) is 
                              added for barcode ligation but this default will be adapated according to the rules 
                              below. XT=null is like XT=0.
                              When running paired-end, two distinct values can be given using the syntax XT=X:Z where X 
                              and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when 
                              BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode as to 
                              end up with reads of the same length (note that this can also be operated using ZT). If a 
                              unique value is given, e.g. XT=1, while running paired-end the following rule applies : 
                              (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode ; (2) 
                              BPOS=BOTH, the value is used for both reads.
                              Default value: 1. This option can be set to 'null' to clear the default value. 

  ZTRIMLEN=String
  ZT=String                   Extra number of bases to be trimmed from the barcode end i.e. 3' end. Pretty handy when a 
                              pipeline is set and you already know you'll trim read at a given size. ZT=null is like 
                              ZT=0.
                              When running paired-end, two distinct values can be given here using the syntax ZT=X:Z 
                              where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even 
                              when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode 
                              as to end up with reads of the same length (note that this can also be operated using 
                              XT). Note that if a single value is passed, the value always aplly to both reads in 
                              paired-end mode without further consideration.
                              Default value: 0. This option can be set to 'null' to clear the default value. 

  CLIP_BARCODE=Boolean
  C=Boolean                   Remove barcode sequence from read, as well as XTRIMLEN (and ZTRIMLEN) bases if 
                              applicable, before writing to output file. If false, reads are written without 
                              modification to output file. Apply to both barcodes when BPOS=BOTH.
                              Default value: true. This option can be set to 'null' to clear the default value. 
                              Possible values: {true, false} 

  ADD_BARCODE_TO_HEADER=Boolean
  ADD=Boolean                 Add matched barcode at the end of the read header. Apply to both barcodes when BPOS=BOTH.
                              If true, the string ':barcode' is added at the end of the read header with a ':' added 
                              only if current read header does not end with ':'.
                              If both reads of the pair have a barcode (i.e. BARCODE_READ_POS == BOTH), thenthe second 
                              read also has its own matched barcode written. Else, the read without a barcode receives 
                              the barcode from the barcoded read.
                              For example :
                              		'@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:'
                              becomes
                              		'@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:BARCODE'
                              Default value: true. This option can be set to 'null' to clear the default value. 
                              Possible values: {true, false} 

  QUALITY_FORMAT=FastqQualityFormat
  V=FastqQualityFormat        A value describing how the quality values are encoded in the fastq.  Either 'Solexa' for 
                              pre-pipeline 1.3 style scores (solexa scaling + 66), 'Illumina' for pipeline 1.3 and 
                              above (phred scaling + 64) or 'Standard' for phred scaled scores with a character shift 
                              of 33.  If this value is not specified (or 'null' is given), the quality format will be 
                              detected automatically.
                              Default value: Standard. This option can be set to 'null' to clear the default value. 
                              Possible values: {Solexa, Illumina, Standard} 

  GZIP_OUTPUTS=Boolean
  GZ=Boolean                  Compress output s_l_t_barcode.txt files using gzip and append a .gz extension to the filenames.
                              Default value: true. This option can be set to 'null' to clear the default value. 
                              Possible values: {true, false} 
  BARCODE_DIAG_FILE=String
  DIAG=String                 Name for a barcode match reporting file (not generated by default).Either a name 
                              (in which case the file will be created in the output dir) or full path. This file will contain 
                              a line per read pair with the barcode best matching the read subsequence or 'null' when no match 
                              is found accordign to matching parameters and the final selected sample. This file is useful for 
                              debugging or further processing in case both ends are barcoded.
                              Default value: null

  READ_NAME_REPLACE_CHAR=String
  RCHAR=String                Replace spaces in read name/header using provided character. This is particularly handy 
                              when you need to retain	 ADDed barcode in read name/header during mapping (everything 
                              after space in read name is usually clipped in BAM files). For example, with RCHAR=':' :
                              		'@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:'
                              becomes
                              		'@D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:2:N:0:BARCODE'
                              Default value: null. 
	</help>
</tool>