view genap2_kb_python/kb_count.xml @ 2:4da457a2c5dc draft

Uploaded
author fwuennemann
date Tue, 20 Apr 2021 21:51:26 +0000
parents dbcb26e2a1db
children 1ecec9b9dde7
line wrap: on
line source

<tool id="kb_python" name="kb_python" version="@VERSION@+galaxy1">
	<description>performs gene and feature quantification on single-cell sequencing data.</description>
	<macros>
		<import>macros.xml</import>
	</macros>
	<requirements>
		<requirement type="package" version="@VERSION@"	>kb-python</requirement>
		<requirement type="package" version="1.0.0">jupyter</requirement>
	</requirements>

	<stdio>
		<exit_code range="1:" />
		<exit_code range=":-1" />
		<regex match="Error:" />
		<regex match="Exception:" />
		<regex match="Exception :" />
	</stdio>
	<command detect_errors="exit_code"><![CDATA[
		mkdir ./index
		&& mkdir ./kb_outs
		#if $refTranscriptSource.TranscriptSource == "history":
			&& ln -s '${refTranscriptSource.h_index.index_file}' './index/kb_ref.idx'
			&& ln -s '${refTranscriptSource.h_index.t2g_file}' './index/t2g.txt'
			#if $workflow == "lamanno" or $workflow == "nucleus":
				&& ln -s '${refTranscriptSource.h_index.history_lamanno.cdna_t2c}' './index/cdna_t2c.txt'
				&& ln -s '${refTranscriptSource.h_index.history_lamanno.intron_t2c}' './index/intron_t2c.txt'
			#end if
			#set $index_path = './index'
		#else if $refTranscriptSource.TranscriptSource == "built":
			&& kb ref -i ./index/kb_ref.idx
				-g ./index/t2g.txt
				-f1 ./index/cdna.fa	
				--workflow $workflow
				#if $workflow == "lamanno" or $workflow == "nucleus":
					-f2 ./index/intron.fa
					-c1 ./index/cdna_t2c.txt
					-c2 ./index/intron_t2c.txt
					--workflow $workflow
				#end if
				#if $workflow != "kite":
					'${refTranscriptSource.s_index.genomic_fasta}'
					'${refTranscriptSource.s_index.genomic_gtf}'
				#else:
					'${refTranscriptSource.s_index.kite_table}'
				#end if
				#set $index_path = './index'
        #end if
		&& kb count
		-t \${GALAXY_SLOTS:-1}
		-i $index_path/kb_ref.idx 
		-g $index_path/t2g.txt
		#if $workflow == "lamanno" or  $workflow == "nucleus":
			-c1 $index_path/cdna_t2c.txt
			-c2 $index_path/intron_t2c.txt
		#end if
		-x $technology
        #if $whitelist:
            --whitelist '${optional.whitelist}'
        #end if
		${optional.multimap}
		${optional.report}
		--workflow $workflow
		#if $extra_dtype != "none":
			$extra_dtype
		#end if
		-o ./kb_outs 
		--cellranger 
		$FASTQ1
		$FASTQ2
		]]>
	</command>
	<inputs>
		<param name="workflow" label="Select the workflow you want to use:" type="select" multiple="false" format="text" help="Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic.
		Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite:10xFB` for 10x Genomics Feature Barcoding technology.">
			<option value="standard">standard</option>
			<option value="lamanno">lamanno</option>
			<option value="nucleus">nucleus</option>
			<option value="kite">kite</option>
		</param>
		<expand macro="index"/>
		<param name="technology" label="Select the scRNA-seq technology:" type="select" multiple="false" format="text" help="Choose the scRNA-seq technology used to generate the fastq data.">
			<option value="10XV1">10XV1</option>
			<option value="10XV2">10XV2</option>
			<option value="10XV3">10XV3</option>
			<option value="CELSEQ">CELSEQ</option>
			<option value="CELSEQ2">CELSEQ2</option>
			<option value="DROPSEQ">DROPSEQ</option>
			<option value="INDROPSV1">INDROPSV1</option>
			<option value="INDROPSV2">INDROPSV2</option>
			<option value="INDROPSV3">INDROPSV3</option>
			<option value="SCRUBSEQ">SCRUBSEQ</option>
			<option value="SURECELL">SURECELL</option>
		</param>
		<param name="FASTQ1" label="Select the R1 fastq file:" type="data" format="fastqsanger.gz" multiple="false"/>
		<param name="FASTQ2" label="Select the R2 fastq file:" type="data" format="fastqsanger.gz" multiple="false"/> 
		<param name="extra_dtype" type="select" label="Do you want any additional output data type beside CellRanger?">
			<option value="none" selected = "true">No</option>
			<option value="--loom">Loom</option>
			<option value="--h5ad">H5ad</option>
		</param>
		<section name="optional" title="Optional commands" expanded="false">
			<param name="whitelist" type="data" format="tsv,tabular" optional="true" label="Whitelist file" help="Whitelisted barcodes to correct to. If not provided and bustools supports the technology, a pre-packaged whitelist is used. If not, the bustools
			whitelist command is used."/>
			<param name="multimap" type="boolean" optional="true" truevalue="--mm" falsevalue="" label="Include multi pseudoaligned reads?" help="Do you want to include reads that pseudoalign to multiple genes?"/>
			<param name="report" type="boolean" optional="true"  truevalue="--report" falsevalue=""  label="Create an html report?"  help="If true, will create an html report with mapping statistics and cell" checked="false"/>
		</section>
	</inputs>
    <outputs>
		<data name="barcodes" label="cellranger barcodes" format="txt" from_work_dir="kb_outs/counts_unfiltered/cellranger/barcodes.tsv"/>
		<data name="genes" label="cellranger genes" format="txt" from_work_dir="kb_outs/counts_unfiltered/cellranger/genes.tsv"/>
		<data name="matrix" label="cellranger matrix" format="mtx" from_work_dir="kb_outs/counts_unfiltered/cellranger/matrix.mtx" />
		<data name="inspect" label="inspect_report" format="json" from_work_dir="kb_outs/inspect.json"/>
		<data name="runinfo" label="run info" format="json" from_work_dir="kb_outs/run_info.json"/>
		<data name="kbinfo" label="kb info" format="json" from_work_dir="kb_outs/kb_info.json"/>
		<data name="kb_ref" label="kb_ref.idx" format="kallisto.idx" from_work_dir="index/kb_ref.idx">
			<filter>refTranscriptSource['TranscriptSource'] == "built"</filter>
		</data>
		<data name="t2g" label="t2g.txt" format="txt" from_work_dir="index/t2g.txt">
			<filter>refTranscriptSource['TranscriptSource'] == "built"</filter>
		</data>
		<data name="cdna_t2c" label="-c1 cdna_t2c.txt" format="txt" from_work_dir="index/cdna_t2c.txt">
			<filter>refTranscriptSource['TranscriptSource'] == "built"</filter>
			<filter>workflow == "lamanno"</filter>
		</data>
		<data name="intron_t2c" label="-c2 intron_t2c.txt" format="txt" from_work_dir="index/intron_t2c.txt">
			<filter>refTranscriptSource['TranscriptSource'] == "built"</filter>
			<filter>workflow == "lamanno"</filter>
		</data>	
		<data name="adata" label="adata.h5ad" format="h5ad" from_work_dir="kb_outs/counts_unfiltered/adata.h5ad">
			<filter	>extra_dtype == "--h5ad"</filter>
		</data>
		<data name="adata" label="adata.loom" format="loom" from_work_dir="kb_outs/counts_unfiltered/adata.loom">
			<filter>extra_dtype == "--loom"</filter>
		</data>
		<data name="report" label="report.html" format="html" from_work_dir="kb_outs/report.html">
			<filter>optional["report"]</filter>
		</data>
	</outputs>
	<tests>
		<expand macro="tests"/>
	</tests>
	<help><![CDATA[
		This is a galaxy wrapper for kallisto-bustools (kb) for quantification of different types of single-cell sequencing data. The main kallisto-bustools homepage can be found under: 
		'kallisto-bustools<https://www.kallistobus.tools/>'.

		The kb count command runs the kallisto and bustools programs. It can be used for pre-processing of data from a variety of single-cell RNA-seq technologies, 
		and for a number of different workflows (e.g. production of gene count matrices, RNA velocity analyses, etc.).
  
		The kb package is developed and maintained by the Pachterlab under the MIT License.
	  ]]></help>
	<citations>
        <citation type="doi">https://doi.org/10.1101/673285</citation>
    </citations>
</tool>