Mercurial > repos > iuc > cite_seq_count
diff cite_seq_count.xml @ 0:3df3d1b51110 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/cite_seq_count commit 70fe293f480c4d0e9f14d4373ecac4cbcf4fe17f
author | iuc |
---|---|
date | Wed, 18 Jan 2023 16:04:20 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cite_seq_count.xml Wed Jan 18 16:04:20 2023 +0000 @@ -0,0 +1,584 @@ +<tool name="CITE-seq-Count" id="cite_seq_count" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> + <description>Count CMO/HTO</description> + <macros> + <token name="@TOOL_VERSION@">1.4.4</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <edam_topics> + <edam_topic>topic_3170</edam_topic> + <edam_topic>topic_4028</edam_topic> + <edam_topic>topic_3308</edam_topic> + </edam_topics> + <xrefs> + <xref type="bio.tools">CITE-seq-Count</xref> + </xrefs> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">cite-seq-count</requirement> + <requirement type="package" version="3.7.12">python</requirement> + <requirement type="package" version="1.0.0">umi_tools</requirement> + <requirement type="package" version="0.20.7">python-levenshtein</requirement> + <requirement type="package" version="0.20.7">levenshtein</requirement> + <requirement type="package" version="0.25.3">pandas</requirement> + <requirement type="package" version="1.0.0">umi_tools</requirement> + <!-- The next dependencies are here to help conda to solve the environment --> + <requirement type="package" version="1.0.8">bzip2</requirement> + <requirement type="package" version="2.5.0">expat</requirement> + <requirement type="package" version="0.70.14">multiprocess</requirement> + <requirement type="package" version="1.21.6">numpy</requirement> + <requirement type="package" version="0.16">pysam</requirement> + <requirement type="package" version="1.7.3">scipy</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal"/> + </stdio> + <version_command>CITE-seq-Count --version</version_command> + <command><![CDATA[ +## This is taken from the star solo wrapper: +## Check that the input pairs are of the same type. +## We consume either repeats of two inputs R1 + R2 +## or a collection of paired reads. +#if str($input_types.use) == "repeat": + #assert len(str($input_types.input1).split(",")) == len(str($input_types.input2).split(",")) + #set $reads1 = $input_types.input1 + #set $reads2 = $input_types.input2 +#elif str($input_types.use) == "list_paired": + #set $reads1 = $input_types.input_collection.forward + #set $reads2 = $input_types.input_collection.reverse +#end if +CITE-seq-Count +--threads \${GALAXY_SLOTS:-4} +--read1 '$reads1' +--read2 '$reads2' +--tags '$tags' +## Chemistry: +#if str($params.chemistry) == "v2": +--cell_barcode_first_base 1 +--cell_barcode_last_base 16 +--umi_first_base 17 +--umi_last_base 26 +#else if str($params.chemistry) == "v3": +--cell_barcode_first_base 1 +--cell_barcode_last_base 16 +--umi_first_base 17 +--umi_last_base 28 +#else if str($params.chemistry) == "custom": +--cell_barcode_first_base $params.cell_barcode_first_base +--cell_barcode_last_base $params.cell_barcode_last_base +--umi_first_base $params.umi_first_base +--umi_last_base $params.umi_last_base +#end if +--bc_collapsing_dist $bc_collapsing_dist +--umi_collapsing_dist $umi_collapsing_dist +$no_umi_correction +--expected_cells $expected_cells +#if $whitelist: +--whitelist '$whitelist' +#end if +--max-error $max_error +#if str($start_trim) != "0": +--start-trim $start_trim +#end if +$sliding_window +$dense +#if str($first_n) != "0": +--first_n $first_n +#end if +#if str($unmapped.output) == "true": +--unknown-top-tags $unknown_top_tags +#end if + +## Outputs are gzip +&& gunzip Results/read_count/barcodes.tsv.gz +&& gunzip Results/read_count/features.tsv.gz +&& gunzip Results/read_count/matrix.mtx.gz +&& gunzip Results/umi_count/barcodes.tsv.gz +&& gunzip Results/umi_count/features.tsv.gz +&& gunzip Results/umi_count/matrix.mtx.gz +]]> +</command> + <inputs> + <conditional name="input_types" > + <param name="use" type="select" label="Input Type" > + <option value="repeat" >Separate barcode and CMO/HTO reads</option> + <option value="list_paired" >Paired collection of barcode and CMO/HTO reads</option> + </param> + <when value="repeat"> + <param format="fastq.gz,fastqsanger.gz" name="input1" type="data" multiple="true" + label="RNA-Seq FASTQ file, Barcode reads" /> + <param format="fastq.gz,fastqsanger.gz" name="input2" type="data" multiple="true" + label="RNA-Seq FASTQ file, HTO/CMO reads"/> + </when> + <when value="list_paired"> + <param name="input_collection" collection_type="paired" type="data_collection" format="fastq.gz,fastqsanger.gz" label="Collection of Pairs" /> + </when> + </conditional> + <param argument="--tags" type="data" format="csv" label="the CMO/HTO barcodes as well as their respective names" help="first column is the sequence and second column is the name" /> + <conditional name="params" > + <param name="chemistry" type="select" label="Configure Chemistry Options"> + <option value="v2" selected="true">Chromium Chemistry v2</option> + <option value="v3">Chromium Chemistry v3</option> + <option value="custom">Custom</option> + </param> + <when value="v2" /> + <when value="v3" /> + <when value="custom" > + <param argument="--cell_barcode_first_base" type="integer" min="1" value="1" label="Cell Barcode First Base" /> + <param argument="--cell_barcode_last_base" type="integer" min="1" value="16" label="Cell Barcode Last Base" /> + <param argument="--umi_first_base" type="integer" min="1" value="17" label="UMI First Base" /> + <param argument="--umi_last_base" type="integer" min="1" value="26" label="UMI Last Base" /> + </when> + </conditional> + <param argument="--bc_collapsing_dist" type="integer" min="0" value="1" label="How many errors are allowed between two cell barcodes to collapse them onto one cell." /> + <param argument="--umi_collapsing_dist" type="integer" min="0" value="2" label="How many errors are allowed between two umi within the same cell and TAG to collapse." /> + <param argument="--no_umi_correction" type="boolean" truevalue="--no_umi_correction" falsevalue="" checked="false" label="Deactivate UMI correction" /> + <param argument="--expected_cells" type="integer" min="1" value="3000" label="How many cells you expect in your run" /> + <param argument="--whitelist" type="data" format="txt,csv,tsv" label="Whitelist of cell barcodes" optional="true"/> + <param name="max_error" type="integer" min="0" value="2" label="Maximum Levenshtein distance allowed for CMO/HTO."/> + <param name="start_trim" type="integer" min="0" value="0" label="How many bases should be trimmed on CMO/HTO read before starting to map"/> + <param name="sliding_window" type="boolean" truevalue="--sliding-window" falsevalue="" checked="false" label="Activate sliding window alignement."/> + <param argument="--dense" type="boolean" truevalue="--dense" falsevalue="" checked="false" label="Output in tsv format (in addition to the dense format)"/> + <param argument="--first_n" type="integer" value="0" label="Select N reads to run on instead of all." /> + <conditional name="unmapped"> + <param name="output" type="select" label="Write table of unknown TAGs to file."> + <option value="false">No</option> + <option value="true">Yes</option> + </param> + <when value="false"/> + <when value="true"> + <param name="unknown_top_tags" type="integer" min="1" value="100" label="Top n unmapped TAGs to output."/> + </when> + </conditional> + </inputs> + <outputs> + <data name="report" format="yaml" label="${tool.name} on ${on_string}: report" from_work_dir="Results/run_report.yaml" /> + <data format="tsv" name="output_features" label="${tool.name} on ${on_string}: Features raw" + from_work_dir="Results/read_count/features.tsv" /> + <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes raw" + from_work_dir="Results/read_count/barcodes.tsv" /> + <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Features Counts raw" + from_work_dir="Results/read_count/matrix.mtx" /> + <data format="tsv" name="output_features_filtered" label="${tool.name} on ${on_string}: Features filtered" + from_work_dir="Results/umi_count/features.tsv" /> + <data format="tsv" name="output_barcodes_filtered" label="${tool.name} on ${on_string}: Barcodes filtered" + from_work_dir="Results/umi_count/barcodes.tsv" /> + <data format="mtx" name="output_matrix_filtered" label="${tool.name} on ${on_string}: Matrix Features Counts filtered (UMI)" + from_work_dir="Results/umi_count/matrix.mtx" /> + <data format="tsv" name="dense_output_matrix" label="${tool.name} on ${on_string}: Dense Matrix With UMI" + from_work_dir="Results/dense_umis.tsv" > + <filter>dense</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="7"> + <!-- test 1 --> + <param name="tags" value="tags.csv"/> + <param name="expected_cells" value="2"/> + <param name="whitelist" value="whitelist.txt"/> + <conditional name="input_types"> + <param name="use" value="repeat"/> + <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/> + <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/> + </conditional> + <conditional name="params" > + <param name="chemistry" value="v2"/> + </conditional> + <output name="report"> + <assert_contents> + <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> + <has_line line="Reads processed: 200"/> + <has_line line="Uncorrected cells: 0"/> + </assert_contents> + </output> + <output name="output_barcodes"> + <assert_contents> + <has_line line="TAGAGGGAAGTCAAGC"/> + <has_line line="TACATATTCTTTACTG"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + <output name="output_features"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix"> + <assert_contents> + <has_line_matching expression="[1-2]\s+[1-4]\s+43"/> + <has_n_lines n="9"/> + </assert_contents> + </output> + <output name="output_barcodes_filtered"> + <assert_contents> + <has_line line="TAGAGGGAAGTCAAGC"/> + <has_line line="TACATATTCTTTACTG"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + <output name="output_features_filtered"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix_filtered"> + <assert_contents> + <has_line_matching expression="[1-2]\s+[1-4]\s+36"/> + <has_n_lines n="9"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="8"> + <!-- test 2 --> + <!-- change input style use dense no whitelist--> + <param name="tags" value="tags.csv"/> + <param name="expected_cells" value="2"/> + <conditional name="input_types"> + <param name="use" value="list_paired" /> + <param name="input_collection" > + <collection type="paired"> + <element name="forward" value="correct_R1.fastq.gz" ftype="fastqsanger.gz" /> + <element name="reverse" value="correct_R2.fastq.gz" ftype="fastqsanger.gz" /> + </collection> + </param> + </conditional> + <conditional name="params" > + <param name="chemistry" value="v2"/> + </conditional> + <param name="dense" value="true"/> + <output name="report"> + <assert_contents> + <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> + <has_line line="Reads processed: 200"/> + <has_line line="Uncorrected cells: 0"/> + </assert_contents> + </output> + <output name="output_barcodes"> + <assert_contents> + <has_line line="TAGAGGGAAGTCAAGC"/> + <has_line line="TACATATTCTTTACTG"/> + <has_n_lines n="2"/> + </assert_contents> + </output> + <output name="output_features"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix"> + <assert_contents> + <has_line_matching expression="[1-2]\s+[1-2]\s+43"/> + <has_n_lines n="9"/> + </assert_contents> + </output> + <output name="output_barcodes_filtered"> + <assert_contents> + <has_line line="TAGAGGGAAGTCAAGC"/> + <has_line line="TACATATTCTTTACTG"/> + <has_n_lines n="2"/> + </assert_contents> + </output> + <output name="output_features_filtered"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix_filtered"> + <assert_contents> + <has_line_matching expression="[1-2]\s+[1-2]\s+36"/> + <has_n_lines n="9"/> + </assert_contents> + </output> + <output name="dense_output_matrix"> + <assert_contents> + <has_line_matching expression="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC\s+2[79]\s+2[79]"/> + <has_line_matching expression="test1-CGTAGCTCG-CGTAGCTCG\s+[23][56]\s+[23][56]"/> + <has_line_matching expression="unmapped\s+2[17]\s+2[17]"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="7"> + <!-- test 3 --> + <!-- custom chemistry max_error 5 --> + <param name="tags" value="tags.csv"/> + <param name="expected_cells" value="2"/> + <param name="whitelist" value="whitelist.txt"/> + <conditional name="input_types"> + <param name="use" value="repeat"/> + <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/> + <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/> + </conditional> + <conditional name="params" > + <param name="chemistry" value="custom"/> + <param name="cell_barcode_first_base" value="1"/> + <param name="cell_barcode_last_base" value="16"/> + <param name="umi_first_base" value="17"/> + <param name="umi_last_base" value="26"/> + </conditional> + <param name="max_error" value="5"/> + <output name="report"> + <assert_contents> + <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> + <has_line line="Reads processed: 200"/> + <has_line line="Uncorrected cells: 0"/> + </assert_contents> + </output> + <output name="output_barcodes"> + <assert_contents> + <has_line line="TAGAGGGAAGTCAAGC"/> + <has_line line="TACATATTCTTTACTG"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + <output name="output_features"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix"> + <assert_contents> + <has_line_matching expression="[1-2]\s+[1-4]\s+68"/> + <has_n_lines n="7"/> + </assert_contents> + </output> + <output name="output_barcodes_filtered"> + <assert_contents> + <has_line line="TAGAGGGAAGTCAAGC"/> + <has_line line="TACATATTCTTTACTG"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + <output name="output_features_filtered"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix_filtered"> + <assert_contents> + <has_line_matching expression="[1-2]\s+[1-4]\s+52"/> + <has_n_lines n="7"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="7"> + <!-- test 4 --> + <!-- incompatible whitelist --> + <param name="tags" value="tags.csv"/> + <param name="expected_cells" value="2"/> + <param name="whitelist" value="incompatible_whitelist.txt"/> + <conditional name="input_types"> + <param name="use" value="repeat"/> + <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/> + <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/> + </conditional> + <conditional name="params" > + <param name="chemistry" value="v2"/> + </conditional> + <output name="report"> + <assert_contents> + <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> + <has_line line="Reads processed: 200"/> + <has_line line="Uncorrected cells: 0"/> + </assert_contents> + </output> + <output name="output_barcodes"> + <assert_contents> + <has_line line="AAAAAAAAAAAAAAAA"/> + <has_line line="TTTTTTTTTTTTTTTT"/> + <has_n_lines n="2"/> + </assert_contents> + </output> + <output name="output_features"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix"> + <assert_contents> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_barcodes_filtered"> + <assert_contents> + <has_line line="AAAAAAAAAAAAAAAA"/> + <has_line line="TTTTTTTTTTTTTTTT"/> + <has_n_lines n="2"/> + </assert_contents> + </output> + <output name="output_features_filtered"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix_filtered"> + <assert_contents> + <has_n_lines n="3"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="7"> + <!-- test 5 --> + <!-- umi_collapsing_dist 5 --> + <param name="tags" value="tags.csv"/> + <param name="expected_cells" value="2"/> + <param name="whitelist" value="whitelist.txt"/> + <conditional name="input_types"> + <param name="use" value="repeat"/> + <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/> + <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/> + </conditional> + <conditional name="params" > + <param name="chemistry" value="v2"/> + </conditional> + <param name="umi_collapsing_dist" value="5"/> + <output name="report"> + <assert_contents> + <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> + <has_line line="Reads processed: 200"/> + <has_line line="Uncorrected cells: 0"/> + </assert_contents> + </output> + <output name="output_barcodes"> + <assert_contents> + <has_line line="TAGAGGGAAGTCAAGC"/> + <has_line line="TACATATTCTTTACTG"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + <output name="output_features"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix"> + <assert_contents> + <has_line_matching expression="[1-2]\s+[1-4]\s+43"/> + <has_n_lines n="9"/> + </assert_contents> + </output> + <output name="output_barcodes_filtered"> + <assert_contents> + <has_line line="TAGAGGGAAGTCAAGC"/> + <has_line line="TACATATTCTTTACTG"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + <output name="output_features_filtered"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix_filtered"> + <assert_contents> + <has_line_matching expression="[1-2]\s+[1-4]\s+10"/> + <has_n_lines n="9"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="7"> + <!-- test 6 --> + <!-- multiple fastqs --> + <param name="tags" value="tags.csv"/> + <param name="expected_cells" value="2"/> + <param name="whitelist" value="whitelist.txt"/> + <conditional name="input_types"> + <param name="use" value="repeat"/> + <param name="input1" value="correct_R1.fastq.gz,correct_R1.fastq.gz" ftype="fastqsanger.gz"/> + <param name="input2" value="correct_R2.fastq.gz,correct_R2.fastq.gz" ftype="fastqsanger.gz"/> + </conditional> + <conditional name="params" > + <param name="chemistry" value="v2"/> + </conditional> + <output name="report"> + <assert_contents> + <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> + <has_line line="Reads processed: 400"/> + <has_line line="Uncorrected cells: 0"/> + </assert_contents> + </output> + <output name="output_barcodes"> + <assert_contents> + <has_line line="TAGAGGGAAGTCAAGC"/> + <has_line line="TACATATTCTTTACTG"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + <output name="output_features"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix"> + <assert_contents> + <has_line_matching expression="[1-2]\s+[1-4]\s+86"/> + <has_n_lines n="9"/> + </assert_contents> + </output> + <output name="output_barcodes_filtered"> + <assert_contents> + <has_line line="TAGAGGGAAGTCAAGC"/> + <has_line line="TACATATTCTTTACTG"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + <output name="output_features_filtered"> + <assert_contents> + <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> + <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> + <has_line line="unmapped"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + <output name="output_matrix_filtered"> + <assert_contents> + <has_line_matching expression="[1-2]\s+[1-4]\s+36"/> + <has_n_lines n="9"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +CITE-seq-Count is a program that outputs UMI and read counts from raw fastq CITE-seq or hashing data. + +Here is an image explaining the expected structure of read1 and read2 from the sequencer: + +.. image:: read_structure.png + :alt: Read1: --barcode--|--umi--|TTTT and Read2: --CMO/HTO--|AAA + +]]></help> + <citations> + <citation type="doi">10.5281/zenodo.2585469</citation> + </citations> +</tool>