Mercurial > repos > bgruening > alevin
view alevin.xml @ 16:b0fe3e482cb7 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit 6954ddebecfe9f0975f2c7e29193e4c99c550514
author | bgruening |
---|---|
date | Mon, 28 Oct 2024 17:50:35 +0000 |
parents | c9944a2600f1 |
children |
line wrap: on
line source
<tool id="alevin" name="Alevin" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE_VERSION@"> <description>Quantification and analysis of 3-prime tagged-end single-cell sequencing data</description> <macros> <import>macros.xml</import> </macros> <expand macro="xrefs"/> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ mkdir ./index && mkdir ./output #if $refTranscriptSource.TranscriptSource != "indexed": && salmon index -i ./index --kmerLen '${refTranscriptSource.s_index.kmer}' --gencode --transcripts '${refTranscriptSource.s_index.fasta}' #set $index_path = './index' #else #set $index_path = $refTranscriptSource.index.fields.path #end if #if $pairstraight.readselect == 'paired': #if $pairstraight.file1.is_of_type("fastq.gz"): && cp '${pairstraight.file1}' ./mate1.fastq.gz && gunzip ./mate1.fastq.gz && cp '${pairstraight.file2}' ./mate2.fastq.gz && gunzip ./mate2.fastq.gz #else if $pairstraight.file1.is_of_type("fastq.bz2"): && cp '${pairstraight.file1}' ./mate1.fastq.bz2 && bzip2 -d ./mate1.fastq.bz2 && cp '${pairstraight.file2}' ./mate2.fastq.bz2 && bzip2 -d ./mate2.fastq.bz2 #else: && ln -s '${pairstraight.file1}' ./mate1.fastq && ln -s '${pairstraight.file2}' ./mate2.fastq #end if #else: #if $pairstraight.unmatedreads.is_of_type("fastq.gz"): && cp '${pairstraight.unmatedreads}' ./unmate.fastq.gz && gunzip ./unmate.fastq.gz #else if $pairstraight.unmatedreads.is_of_type("fastq.bz2"): && cp '${pairstraight.unmatedreads}' ./unmate.fastq.bz2 && bzip2 -d unmate.fastq.bz2 #else: && ln -s '${pairstraight.unmatedreads}' ./unmate.fastq #end if #end if && ln -s '${tgmap}' ./alevinmap.tsv && salmon alevin -l #if $pairstraight.readselect == 'paired': #if $pairstraight.libtype.strandedness == 'A' A #else ${pairstraight.libtype.orientation}${pairstraight.libtype.strandedness} #end if -i $index_path -1 ./mate1.fastq -2 ./mate2.fastq #else: '${pairstraight.libtype.strandedness}' -i $index_path -r zcat ./unmate.fastq #end if -o ./output -p "\${GALAXY_SLOTS:-4}" ${protocol_cond.protocol} #if $protocol_cond.protocol == '--indropV2' --w1 '${protocol_cond.w1}' #end if --tgMap ./alevinmap.tsv #if $whitelist: --whitelist '${optional.whitelist}' #end if #if $optional.numCellBootstraps: --numCellBootstraps '${optional.numCellBootstraps}' #end if #if $optional.forceCells: --forceCells '${optional.forceCells}' #end if #if $optional.expectCells: --expectCells '${optional.expectCells}' #end if #if $optional.mrna: --mrna '${optional.mrna}' #end if #if $optional.rrna: --rrna '${optional.rrna}' #end if #if $optional.keepCBFraction: --keepCBFraction '${optional.keepCBFraction}' #end if ${optional.noDedup} #if 'dumpBfh' in $output_files: --dumpBfh #end if #if 'dumpFeatures' in $output_files: --dumpFeatures #end if #if 'dumpUmiGraph' in $output_files: --dumpUmiGraph #end if ${optional.dumpMtx} #if $optional.maxNumBarcodes: --maxNumBarcodes '${optional.maxNumBarcodes}' #end if #if $optional.freqThreshold: --freqThreshold '${optional.freqThreshold}' #end if ## && gunzip output/alevin/quants_tier_mat.gz -> the output is binary file #if $optional.dumpMtx != "--dumpMtx": && python '$__tool_directory__/vpolo_convert.py' -m #else: && gunzip output/alevin/quants_mat.mtx.gz #end if #if 'dumpUmiGraph' in $output_files: && python '$__tool_directory__/vpolo_convert.py' -u && sh '$__tool_directory__/umiout.sh' #end if ## those gzip file include binary datasets ## #if $optional.numCellBootstraps: ## && gunzip output/alevin/quants_mean_mat.gz ## && gunzip output/alevin/quants_var_mat.gz ## #end if #if 'auxiliar_info' in $output_files && tar -zcvf aux_info.tar.gz output/aux_info #end if ]]> </command> <inputs> <expand macro="index"/> <conditional name="pairstraight"> <param name="readselect" label="Single or paired-end reads?" type="select"> <option value="paired">Paired-end</option> <option value="unmated">Single-end</option> </param> <when value="paired"> <param name="file1" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" label="Mate pair 1" help="CB+UMI raw sequence file(s)"/> <param name="file2" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" label="Mate pair 2" help="Read-sequence file(s)"/> <expand macro="stranded_pe"/> </when> <when value="unmated"> <param name="unmatedreads" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" label="Unmated reads files"/> <expand macro="stranded_se"/> </when> </conditional> <conditional name="protocol_cond"> <param name="protocol" type="select" label="Type of single-cell protocol" help="In cases where single-cell protocol supports variable length cellbarcodes, alevin adds nucleotide padding to make the lengths uniform. Furthermore, the padding scheme ensures that there are no collisions added in the process."> <option value="--dropseq">DropSeq Single Cell protocol</option> <option value="--chromium">10x chromium v2 Single Cell protocol</option> <option value="--chromiumV3">10x chromium v3 Single Cell protocol</option> <option value="--gemcode">Gemcode v1 Single Cell protocol</option> <option value="--celseq">CEL-Seq Single Cell protocol</option> <option value="--celseq2">CEL-Seq2 Single Cell protocol</option> <option value="--sciseq3">Sci-RNA-seq3 protocol</option> <option value="--indropV2">InDrop v2 protocol</option> <option value="--splitSeqV1">SplitSeqV1 protocol</option> <option value="--splitSeqV2">SplitSeqV2 protocol</option> </param> <when value="--dropseq"/> <when value="--chromium"/> <when value="--chromiumV3"/> <when value="--gemcode"/> <when value="--celseq"/> <when value="--celseq2"/> <when value="--sciseq3"/> <when value="--indropV2"> <param argument="--w1" type="text" value="" label="w1 adapters"> <sanitizer invalid_char=""> <valid initial="string.letters"/> </sanitizer> <validator type="regex">[ATGC]+</validator> </param> </when> <when value="--splitSeqV1"/> <when value="--splitSeqV2"/> </conditional> <param name="tgmap" type="data" format="tsv,tabular" label="Transcript to gene map file" help="Tsv with no header, containing two columns mapping each transcript present in the reference to the corresponding gene (the first column is a transcript and the second is the corresponding gene)."/> <param name="output_files" type="select" multiple="true" display="checkboxes" label="Extra output files"> <option value="salmon_log">Salmon Quant log file</option> <option value="fragment_length">Observed fragment length distribution</option> <option value="auxiliar_info">Auxiliar info files</option> <option value="dumpUmiGraph">Per cell level parsimonious Umi graph (--dumpUmiGraph)</option> <option value="dumpFeatures">Features used by the CB classification and their counts at each cell level (--dumpFeatures)</option> <option value="dumpBfh">Full CB-EC-UMI-count data-structure (--dumpBfh)</option> <option value="commands">Commands info file</option> </param> <section name="optional" title="Advanced options" expanded="false"> <param argument="--whitelist" type="data" format="tsv,tabular" optional="true" label="Whitelist file" help="Explicitly specify whitelist CP for cell detection and CB sequence correction. If not specified, putative CBs generated."/> <param argument="--noDedup" type="boolean" truevalue="--noDedup" falsevalue="" checked="false" label="Skip deduplication step" help="Causes pipeline to only perform CB correction, then maps the read-sequences to the transcriptome generating the interim data-structure of CB-EqClass-UMI-count. Used in parallel with --dumpBarcodeEq or --dumpBfh for the purposes of obtaining raw information or debugging."/> <param argument="--mrna" type="data" format="tsv" optional="true" label="Mito-RNA genes file" help="Single column tsv of mitochondrial genes which are to be used as a feature for CB whitelising naive Bayes classification."/> <param argument="--rrna" type="data" format="tsv" optional="true" label="Ribosomal RNA file" help="Single column tsv of ribosomal genes which are to be used as a feature for CB whitelising naive Bayes classification."/> <param argument="--dumpMtx" type="boolean" truevalue="--dumpMtx" falsevalue="" checked="false" label=" Dump cell v transcripts count matrix in MTX format" help="Converts the default binary format of alevin for gene-count matrix into a human readable mtx (matrix market exchange) sparse format."/> <param argument="--forceCells" type="integer" min="0" optional="true" label="Number of cells" help="Explicitly specify the number of cells."/> <param argument="--expectCells" type="integer" min="0" optional="true" label="Upper bound on expected number of cells" help="define a close upper bound on expected number of cells."/> <param argument="--numCellBootstraps" type="integer" min="0" value="0" optional="true" label="Generate mean and variance for cell x gene matrix by boostrap" help="Performs certain number of bootstrap and generate the mean and variance of the count matrix"/> <param argument="--minScoreFraction" type="float" optional="true" label="Minimum allowed score for a mapping" help="This value controls the minimum allowed score for a mapping to be considered valid. It matters only when --validateMappings has been passed to Salmon. The maximum possible score for a fragment is ms = read_len * ma (or ms = (left_read_len + right_read_len) * ma for paired-end reads). The argument to --minScoreFraction determines what fraction of the maximum score s a mapping must achieve to be potentially retained. For a minimum score fraction of f, only mappings with a score less than (f * s) will be kept. Mappings with lower scores will be considered as low-quality, and will be discarded."/> <param argument="--keepCBFraction" type="float" min="0" max="1" optional="true" label="Fraction of cellular barcodes to keep" help="Use 1 to quantify all"/> <param argument="--maxNumBarcodes" type="integer" min="0" value="100000" label="Maximum allowable limit to process the cell barcodes" help="Default: 100000"/> <param argument="--freqThreshold" type="integer" min="0" value="10" optional="true" label="Minimum frequency for a barcode to be considered" help="Default: 10"/> </section> </inputs> <outputs> <data name="quants_mat_tsv" label="${tool.name} on ${on_string}: per-cell gene-count matrix (tabular)" format="txt" from_work_dir="quants_mat.tsv"> <filter>optional["dumpMtx"] is not True</filter> </data> <data name="quants_mat_mtx" label="${tool.name} on ${on_string}: per-cell gene-count matrix (MTX)" format="mtx" from_work_dir="output/alevin/quants_mat.mtx"> <filter>optional["dumpMtx"]</filter> </data> <data name="quants_mat_gz" label="${tool.name} on ${on_string}: per-cell level gene-count matrix (binary)" format="gz" from_work_dir="output/alevin/quants_mat.gz"/> <data name="quants_mat_cols_txt" label="${tool.name} on ${on_string}: column headers (gene-ids)" format="txt" from_work_dir="output/alevin/quants_mat_cols.txt"/> <data name="quants_mat_rows_txt" label="${tool.name} on ${on_string}: row index (CB-ids)" format="txt" from_work_dir="output/alevin/quants_mat_rows.txt"/> <data name="quants_tier_mat" label="${tool.name} on ${on_string}: tier categorization" format="gz" from_work_dir="output/alevin/quants_tier_mat.gz"/> <data name="featureDump_txt" label="${tool.name} on ${on_string}: cell-level information (featureDump)" format="txt" from_work_dir="output/alevin/featureDump.txt"/> <data name="raw_cb_frequency_txt" label="${tool.name} on ${on_string}: raw CB classification frequencies" format="txt" from_work_dir="output/alevin/raw_cb_frequency.txt"> <filter>output_files and 'dumpFeatures' in output_files</filter> </data> <data name="whitelist_txt" label="${tool.name} on ${on_string}: whitelist" format="txt" from_work_dir="output/alevin/whitelist.txt"/> <data name="auxiliar_files" label="${tool.name} on ${on_string}: auxiliar info files" format="tgz" from_work_dir="aux_info.tar.gz"> <filter>output_files and 'auxiliar_info' in output_files</filter> </data> <data name="salmon_quant_log" format="txt" label="${tool.name} on ${on_string}: Salmon log file" from_work_dir="output/logs/salmon_quant.log"> <filter>output_files and 'salmon_log' in output_files</filter> </data> <data name="cmd_info" label="${tool.name} on ${on_string}: command info (JSON)" format="json" from_work_dir="output/cmd_info.json"> <filter>output_files and 'commands' in output_files</filter> </data> <data name="flenDist_txt" format="txt" label="${tool.name} on ${on_string}: observed fragment length distribution" from_work_dir="output/libParams/flenDist.txt"> <filter>output_files and 'fragment_length' in output_files</filter> </data> <data name="bfh_txt" label="${tool.name} on ${on_string}: full CB-EC-UMI-count data-structure" format="txt" from_work_dir="output/alevin/bfh.txt"> <filter>output_files and 'dumpBfh' in output_files</filter> </data> <data name="quants_mean_mat" label="${tool.name} on ${on_string}: count matrix mean file" format="gz" from_work_dir="output/alevin/quants_mean_mat.gz"> <filter>optional["numCellBootstraps"]</filter> </data> <data name="quants_var_mat" label="${tool.name} on ${on_string}: count matrix variance file" format="gz" from_work_dir="output/alevin/quants_var_mat.gz"> <filter>optional["numCellBootstraps"]</filter> </data> <data name="quants_boot_rows_txt" label="${tool.name} on ${on_string}: bootstraps rows" format="txt" from_work_dir="output/alevin/quants_boot_rows.txt"> <filter>optional["numCellBootstraps"]</filter> </data> <collection name="umigraphs" type="list" label="${tool.name} on ${on_string}: UMI graph PDFs"> <filter>output_files and 'dumpUmiGraph' in output_files</filter> <discover_datasets pattern="(?P<name>.+)\.pdf" format="pdf" directory="fixed"/> </collection> </outputs> <tests> <test expect_num_outputs="8"> <conditional name="refTranscriptSource"> <param name="TranscriptSource" value="history"/> <section name="s_index"> <param name="fasta" value="minitranscript.fa"/> </section> </conditional> <conditional name="pairstraight"> <param name="readselect" value="paired"/> <param name="file1" value="fastqs/moreminifastq1.fastq.gz"/> <param name="file2" value="fastqs/moreminifastq2.fastq.gz"/> <param name="orientation" value="I"/> <param name="strandedness" value="SR"/> </conditional> <conditional name="protocol_cond"> <param name="protocol" value="--chromium"/> </conditional> <param name="tgmap" value="minitxp.tsv"/> <param name="output_files" value="dumpFeatures"/> <section name="optional"> <param name="keepCBFraction" value="1"/> <param name="freqThreshold" value="5"/> <param name="dumpMtx" value="true"/> </section> <output name="quants_mat_mtx" file="alevin_mat_01.mtx" compare="sim_size" ftype="mtx"> <assert_contents> <has_text text="MatrixMarket" /> <has_n_columns n="5" /> <has_n_lines n="105" /> </assert_contents> </output> </test> <test expect_num_outputs="11"> <conditional name="refTranscriptSource"> <param name="TranscriptSource" value="history"/> <section name="s_index"> <param name="fasta" value="minitranscript.fa"/> </section> </conditional> <conditional name="pairstraight"> <param name="readselect" value="paired"/> <param name="file1" value="fastqs/moreminifastq1.fastq.gz"/> <param name="file2" value="fastqs/moreminifastq2.fastq.gz"/> <param name="orientation" value="I"/> <param name="strandedness" value="SR"/> </conditional> <conditional name="protocol_cond"> <param name="protocol" value="--chromium"/> </conditional> <param name="tgmap" value="minitxp.tsv"/> <param name="output_files" value="dumpFeatures"/> <section name="optional"> <param name="keepCBFraction" value="1"/> <param name="numCellBootstraps" value="2"/> <param name="freqThreshold" value="5"/> <param name="dumpMtx" value="true"/> </section> <output name="quants_mat_mtx" file="alevin_mat_02.mtx" compare="sim_size" ftype="mtx" > <assert_contents> <has_text text="MatrixMarket" /> <has_n_columns n="5" /> <has_n_lines n="105" /> </assert_contents> </output> </test> <test expect_num_outputs="8"> <conditional name="refTranscriptSource"> <param name="TranscriptSource" value="history"/> <section name="s_index"> <param name="fasta" value="minitranscript.fa"/> </section> </conditional> <conditional name="pairstraight"> <param name="readselect" value="paired"/> <param name="file1" value="fastqs/moreminifastq1.fastq.gz"/> <param name="file2" value="fastqs/moreminifastq2.fastq.gz"/> <param name="orientation" value="I"/> <param name="strandedness" value="SR"/> </conditional> <conditional name="protocol_cond"> <param name="protocol" value="-\-indropV2"/> <param name="w1" value="ATCAT"/> </conditional> <param name="tgmap" value="minitxp.tsv"/> <param name="output_files" value="dumpFeatures"/> <section name="optional"> <param name="keepCBFraction" value="1"/> <param name="freqThreshold" value="5"/> <param name="dumpMtx" value="true"/> </section> <output name="quants_mat_mtx" file="alevin_mat_indropV2.mtx" compare="sim_size" ftype="mtx"> <assert_contents> <has_text text="MatrixMarket" /> <has_n_columns n="5" /> <has_n_lines n="105" /> </assert_contents> </output> </test> <test expect_num_outputs="14"> <conditional name="refTranscriptSource"> <param name="TranscriptSource" value="history"/> <section name="s_index"> <param name="fasta" value="minitranscript.fa"/> </section> </conditional> <conditional name="pairstraight"> <param name="readselect" value="paired"/> <param name="file1" value="fastqs/moreminifastq1.fastq.gz"/> <param name="file2" value="fastqs/moreminifastq2.fastq.gz"/> <param name="orientation" value="I"/> <param name="strandedness" value="SR"/> </conditional> <conditional name="protocol_cond"> <param name="protocol" value="--chromium"/> </conditional> <param name="tgmap" value="minitxp.tsv"/> <param name="output_files" value="dumpFeatures,salmon_log,fragment_length,auxiliar_info,dumpUmiGraph,dumpBfh,commands"/> <section name="optional"> <param name="dumpMtx" value="true"/> </section> <output name="quants_mat_mtx" file="alevin_mat.mtx" compare="sim_size" ftype="mtx"> <assert_contents> <has_text text="MatrixMarket" /> <has_n_columns n="5" /> <has_n_lines n="17" /> </assert_contents> </output> <output name="salmon_quant_log" ftype="txt"> <assert_contents> <has_text text="Index contained 322 targets"/> <has_text text="Counted 14 total reads in the equivalence classes"/> </assert_contents> </output> <output name="flenDist_txt" file="length_distribution.txt" ftype="txt"/> <output name="auxiliar_files" ftype="tgz"> <assert_contents> <has_size value="1898" delta="100"/> </assert_contents> </output> <output_collection name="umigraphs" type="list" count="14"> <element name="AGTGGGATCTTAACCT"> <assert_contents> <has_size value="4661" delta="1000"/> </assert_contents> </element> </output_collection> <output name="bfh_txt" file="full_data_structure.txt" ftype="txt"/> </test> </tests> <help><![CDATA[ @salmonhelp@ @alevinhelp@ ]]></help> <expand macro="citations"/> </tool>