Mercurial > repos > iuc > rna_starsolo
view rg_rnaStarSolo.xml @ 20:45795f582ae9 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 2b3fa63863a366beef057c7f75ccbcaf0c280151
author | iuc |
---|---|
date | Tue, 27 Aug 2024 14:11:53 +0000 |
parents | 5ef7ec16b04f |
children |
line wrap: on
line source
<tool id="rna_starsolo" name="RNA STARSolo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> <description>mapping, demultiplexing and gene quantification for single cell RNA-seq</description> <macros> <import>macros.xml</import> </macros> <expand macro="edam"/> <xrefs> <xref type="bio.tools">star</xref> </xrefs> <expand macro="requirements"/> <expand macro="stdio" > <regex match="Segmentation fault" source="both" level="fatal" /> </expand> <command><![CDATA[ @TEMPINDEX@ STAR @REFGENOMEHANDLING@ ## Supports Drop-seq, 10X Chromium, inDrop and Smart-Seq --soloType $sc.solo_type #if str($sc.solo_type) == "CB_UMI_Simple": @READSHANDLING@ #if $sc.soloCBwhitelist: --soloCBwhitelist '$sc.soloCBwhitelist' #else --soloCBwhitelist None #end if ## 1 - check length of barcode, 0 - do not check ## Good for checking custom chemistries --soloBarcodeReadLength $sc.soloBarcodeReadLength #if str($sc.params.chemistry) == "Cv2": --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --soloUMIlen 10 #else if str($sc.params.chemistry) == "Cv3": --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17 --soloUMIlen 12 #else if str($sc.params.chemistry) == "custom": --soloCBstart $sc.params.soloCBstart --soloCBlen $sc.params.soloCBlen --soloUMIstart $sc.params.soloUMIstart --soloUMIlen $sc.params.soloUMIlen #if $sc.params.bccdna_mate.bc_location == "same_mate": --soloBarcodeMate $sc.params.bccdna_mate.soloBarcodeMate #if $sc.params.bccdna_mate.soloBarcodeMate == "1": --clip5pNbases $sc.params.bccdna_mate.clip_n_bases 0 #else if $sc.params.bccdna_mate.soloBarcodeMate == "2": --clip3pNbases 0 $sc.params.bccdna_mate.clip_n_bases #end if #end if --soloAdapterSequence '$sc.params.soloAdapterSequence' --soloAdapterMismatchesNmax $sc.params.soloAdapterMismatchesNmax --clipAdapterType $sc.params.clipAdapterType #end if #elif str($sc.solo_type) == "CB_UMI_Complex": @READSHANDLING@ ## inDrop supports multiple cell barcodes of varying length #set $cb_whitelist = [] #set $cb_pos = [] #for $cb in $sc.cb_whitelists: #silent $cb_whitelist.append(str($cb.whitelist_file)) #silent $cb_pos.append('_'.join([str($cb.cb_start_anchor), str($cb.cb_start_anchor_pos),str($cb.cb_end_anchor), str($cb.cb_end_anchor_pos)])) #end for #set $cb_whitelist = ' '.join($cb_whitelist) --soloCBwhitelist $cb_whitelist #set $cb_pos = ' '.join($cb_pos) --soloCBposition $cb_pos #set $umi_pos = '_'.join([str($sc.umi_start_anchor), str($sc.umi_start_anchor_pos), str($sc.umi_end_anchor), str($sc.umi_end_anchor_pos)]) --soloUMIposition $umi_pos --soloAdapterSequence '$sc.soloAdapterSequence' --soloAdapterMismatchesNmax $sc.soloAdapterMismatchesNmax --clipAdapterType $sc.clipAdapterType #elif str($sc.solo_type) == "SmartSeq": ## Create a manifest file with fastq files and their corresponding cell-ids ## For Smart-Seq [R1] is followed by [R2] --readFilesManifest '$manifest_file' #set $read_files_command = "" #if str($sc.input_types_smart_seq.use) == "list_single_end": #if $sc.input_types_smart_seq.single_end_collection[0].is_of_type('fastq.gz', 'fastqsanger.gz'): @FASTQ_GZ_OPTION@ #end if #elif str($sc.input_types_smart_seq.use) == "list_paired_end": #if $sc.input_types_smart_seq.paired_end_collection[0].forward.is_of_type('fastq.gz', 'fastqsanger.gz'): @FASTQ_GZ_OPTION@ #end if #end if --soloCBwhitelist None #end if #if $solo.wasp_conditional.waspOutputMode == "wasp_mode": --waspOutputMode SAMtag --varVCFfile '$solo.wasp_conditional.varVCFfile' #end if --soloStrand $solo.soloStrand --soloFeatures $solo.soloFeatures --soloUMIdedup $sc.umidedup.soloUMIdedup #if str($sc.umidedup.soloUMIdedup) == "1MM_CR": --soloUMIfiltering $sc.umidedup.soloUMIfiltering #end if --quantMode TranscriptomeSAM $solo.quantModeGene #set $tag_names = str($solo.outSAMattributes).replace(',', ' ') --outSAMattributes $tag_names #if "CB" in $tag_names or "UB" in $tag_names or str($outWig.outWigType) != 'None': --outSAMtype BAM SortedByCoordinate #else: --outSAMtype BAM Unsorted #end if #if str($solo.filter.filter_type) == "cellranger2": --soloCellFilter CellRanger2.2 $solo.filter.n_expected $solo.filter.max_perc $solo.filter.max_min_ratio #else if str($solo.filter.filter_type) == "emptydrops": --soloCellFilter EmptyDrops_CR $solo.filter.nExpectedCells $solo.filter.maxPercentile $solo.filter.maxMinRatio $solo.filter.indMin $solo.filter.indMax $solo.filter.umiMin $solo.filter.umiMinFracMedian $solo.filter.candMaxN $solo.filter.FDR $solo.filter.simN #else if str($solo.filter.filter_type) == "topcells": --soloCellFilter TopCells $solo.filter.n_cells #else if str($solo.filter.filter_type) == "no_filter": --soloCellFilter None #end if ## Splice junctions are always under "raw" directory --soloOutFormatFeaturesGeneField3 '${solo.soloOutFormatFeaturesGeneField3}' ## Unmapped $solo.outSAMunmapped ## Read MAPQ --outSAMmapqUnique ${solo.outSAMmapqUnique} ## Limits @LIMITS@ ##outWig: @OUTWIG@ ## Rename the the selected features directory #if $solo.soloFeatures == 'Gene Velocyto' && mv Solo.out/Velocyto Solo.out/soloFeatures #else && mv Solo.out/${solo.soloFeatures} Solo.out/soloFeatures #end if ## put the barcodes and features stats into a single file && cat <(echo "Barcodes:") Solo.out/Barcodes.stats <(echo "Genes:") Solo.out/soloFeatures/Features.stats > '${output_stats}' #if "CB" in $tag_names or "UB" in $tag_names or str($outWig.outWigType) != 'None': ## recompress BAM output for smaller file size && samtools view -b -o '$output_BAM' Aligned.sortedByCoord.out.bam #else: ## BAM sorting (logic copied from samtools_sort wrapper) ## choosing BAM SortedByCoord appeared once to give fewer reads ## than BAM Unsorted followed by a samtools sort ## so better go with the latter? && ##compute the number of ADDITIONAL threads to be used by samtools (-@) addthreads=\${GALAXY_SLOTS:-2} && (( addthreads-- )) && ##compute the number of memory available to samtools sort (-m) ##use only 75% of available: https://github.com/samtools/samtools/issues/831 addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && ((addmemory=addmemory*75/100)) && samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam #end if ##outWig: @OUTWIGOUTPUTS@ ]]></command> <configfiles> <configfile name="manifest_file" > #if str($sc.solo_type) == "SmartSeq": #set $cellids_fh = open(str($sc.cell_ids), 'r') #set $cellids = [str(x.strip()) for x in $cellids_fh.readlines()] #silent $cellids_fh.close() #set $samples = [] #if str($sc.input_types_smart_seq.use) == "list_single_end": #assert len($cellids) == len($sc.input_types_smart_seq.single_end_collection.keys()) #for $i,$r1 in enumerate($sc.input_types_smart_seq.single_end_collection): #silent $samples.append('\t'.join([str($r1), '-', 'ID:' + $cellids[$i]])) #end for #elif str($sc.input_types_smart_seq.use) == "list_paired_end": #assert len($cellids) == len($sc.input_types_smart_seq.paired_end_collection.keys()) #for $i,($r1,$r2) in enumerate($sc.input_types_smart_seq.paired_end_collection): #silent $samples.append('\t'.join([str($r1), str($r2), 'ID:' + $cellids[$i]])) #end for #end if #echo '\n'.join($samples) #end if </configfile> </configfiles> <inputs> <!-- Genome source. --> <conditional name="refGenomeSource"> <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options"> <option value="indexed" selected="true">Use a built-in index</option> <option value="history">Use reference genome from history and create temporary index</option> </param> <when value="indexed"> <conditional name="GTFconditional"> <param name="GTFselect" type="select" label="Reference genome with annotation" help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions, and provide your own splice junction annonations."> <option value="without-gtf-with-gtf" selected='true'>use genome reference without builtin gene-model</option> <option value="with-gtf">use genome reference with builtin gene-model</option> </param> <when value="with-gtf"> <expand macro="index_selection" with_gene_model="1" /> </when> <when value="without-gtf-with-gtf"> <expand macro="index_selection" with_gene_model="0" /> <expand macro="SJDBOPTIONS"/> </when> </conditional> </when> <when value="history"> <expand macro="ref_selection" /> <expand macro="SJDBOPTIONS"/> </when> </conditional> <conditional name="sc" > <param name="solo_type" type="select" label="Type of single-cell RNA-seq" > <option value="CB_UMI_Simple">Drop-seq or 10X Chromium</option> <option value="CB_UMI_Complex">inDrop</option> <option value="SmartSeq">Smart-Seq</option> </param> <when value="CB_UMI_Simple"> <expand macro="input_selection" /> <param format="txt,tsv" argument="--soloCBwhitelist" optional="True" type="data" label="RNA-Seq Cell Barcode Whitelist"/> <conditional name="params" > <param name="chemistry" type="select" label="Configure Chemistry Options"> <option value="Cv2" selected="true">Chromium chemistry v2</option> <option value="Cv3">Chromium chemistry v3</option> <option value="custom">Custom</option> </param> <when value="Cv2" /> <when value="Cv3" /> <when value="custom" > <param argument="--soloCBstart" type="integer" min="1" value="1" label="Cell Barcode Start Base" /> <param argument="--soloCBlen" type="integer" min="1" value="16" label="Cell Barcode Length" /> <param argument="--soloUMIstart" type="integer" min="1" value="17" label="UMI Start Base" /> <param argument="--soloUMIlen" type="integer" min="1" value="10" label="UMI Length" /> <conditional name="bccdna_mate" > <param name="bc_location" type="select" label="Barcode and cDNA on the same mate\?" > <option value="other_mate" selected="true">BC and cDNA are on different mates of paired-end read</option> <option value="same_mate">BC and cDNA are on the same mate of paired-end read</option> </param> <when value="other_mate" /> <when value="same_mate" > <param argument="--soloBarcodeMate" type="select" label="Barcode sequence is a part of"> <option value="1" selected="true">mate 1</option> <option value="2">mate 2</option> </param> <param name="clip_n_bases" type="integer" value="39" label="Number of bases to clip (=CB+UMI+adapter)"/> </when> </conditional> <expand macro="solo_adapter_params" /> </when> </conditional> <param argument="--soloBarcodeReadLength" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Barcode Size is same size of the Read" help="Disable this if your R1 barcodes contain poly-T bases after the barcode sequence." /> <conditional name="umidedup"> <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs."> <expand macro="umidedup_options" /> <option value="Exact" >Exact</option> <option value="1MM_CR" >CellRanger2-4 algorithm</option> </param> <when value="1MM_All"/> <when value="1MM_Directional_UMItools"/> <when value="1MM_Directional"/> <when value="Exact"/> <when value="1MM_CR"> <param argument="--soloUMIfiltering" type="select" label="Type of UMI filtering" > <option value="-" selected="true">Remove UMIs with N and homopolymers (similar to CellRanger 2.2.0)</option> <option value="MultiGeneUMI" >Remove lower-count UMIs that map to more than one gene</option> <option value="MultiGeneUMI_All" >Remove all UMIs that map to more than one gene</option> <option value="MultiGeneUMI_CR" >Remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0</option> </param> </when> </conditional> <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match; 1MM_multi: multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches; 1MM_multi_pseudocounts: same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes."> <expand macro="cb_match_wl_common" /> <expand macro="cb_match_wl_cellranger" /> </param> </when> <when value="CB_UMI_Complex"> <expand macro="input_selection" /> <repeat name="cb_whitelists" title="Cell barcode whitelist information" max="2" > <param name="whitelist_file" format="txt,tsv" type="data" label="RNA-Seq Cell Barcode Whitelist"/> <param name="cb_start_anchor" type="select" label="Start anchor base for cell barcode"> <expand macro="anchor_types" /> </param> <param name="cb_start_anchor_pos" type="integer" value="0" label="0-based position of the CB start with respect to the anchor base" /> <param name="cb_end_anchor" type="select" label="End anchor base for cell barcode"> <expand macro="anchor_types" /> </param> <param name="cb_end_anchor_pos" type="integer" value="0" label="0-based position of the CB end with respect to the anchor base" /> </repeat> <param name="umi_start_anchor" type="select" label="Start anchor base for UMI"> <expand macro="anchor_types" /> </param> <param name="umi_start_anchor_pos" type="integer" value="0" label="0-based position of the UMI start with respect to the anchor base" /> <param name="umi_end_anchor" type="select" label="End anchor base for UMI"> <expand macro="anchor_types" /> </param> <param name="umi_end_anchor_pos" type="integer" value="0" label="0-based position of the UMI end with respect to the anchor base" /> <expand macro="solo_adapter_params" /> <conditional name="umidedup"> <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs."> <expand macro="umidedup_options" /> <option value="Exact" >Exact</option> <option value="1MM_CR" >CellRanger2-4 algorithm</option> </param> <when value="1MM_All"/> <when value="1MM_Directional_UMItools"/> <when value="1MM_Directional"/> <when value="Exact"/> <when value="1MM_CR"> <param argument="--soloUMIfiltering" type="select" label="Type of UMI filtering" > <option value="-" selected="true">Remove UMIs with N and homopolymers (similar to CellRanger 2.2.0)</option> <option value="MultiGeneUMI" >Remove lower-count UMIs that map to more than one gene</option> <option value="MultiGeneUMI_All" >Remove all UMIs that map to more than one gene</option> <option value="MultiGeneUMI_CR" >Remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0</option> </param> </when> </conditional> <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match; 1MM_multi: multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches; 1MM_multi_pseudocounts: same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes."> <expand macro="cb_match_wl_common" /> <!-- should we add EditDist_2? --> </param> </when> <when value="SmartSeq"> <expand macro="input_selection_smart_seq" /> <param name="cell_ids" format="txt,tsv" type="data" label="File containing cell IDs of the samples. One ID per line in order of samples in the above collection."/> <conditional name="umidedup"> <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs."> <expand macro="umidedup_options" /> <option value="Exact" >Exact</option> <option value="NoDedup" >Do not deduplicate UMIs</option> </param> <when value="1MM_All"/> <when value="1MM_Directional_UMItools"/> <when value="1MM_Directional"/> <when value="Exact"/> <when value="NoDedup"/> </conditional> </when> </conditional> <section name="solo" title="Advanced Settings" expanded="true"> <param argument="--soloStrand" type="select" label="Strandedness of Library" help="Unstranded has no strand information, Forward has the read strand the same as the original RNA molecule, Reverse has the read strand opposite to the original RNA molecule"> <option value="Unstranded" >No strand information</option> <option value="Forward" selected="true" >Read strand same as the original RNA molecule</option> <option value="Reverse" >Read strand opposite to the original RNA molecule</option> </param> <param argument="--soloFeatures" type="select" label="Collect UMI counts for these genomic features" > <option value="Gene" selected="true">Gene: Count reads matching the Gene Transcript</option> <option value="SJ" >Splice Junctions: Count reads at exon-intron junctions</option> <option value="GeneFull" >Full: Count all reads overlapping genes' exons and introns</option> <option value="GeneFull_ExonOverIntron" >Full: Count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons</option> <option value="GeneFull_Ex50pAS" >Full: Count all reads overlapping genes' exons and introns: prioritize 50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.</option> <option value="Gene Velocyto">Velocyto: calculate spliced, unspliced, and ambiguous counts per cell per gene similar to the velocyto tool</option> </param> <expand macro="wasp"/> <conditional name="filter" > <param name="filter_type" type="select" label="Cell filtering type and parameters" > <option value="cellranger2" selected="true" >Simple filtering of CellRanger v2</option> <option value="emptydrops" >EmptyDrops filtering in CellRanger flavor</option> <option value="topcells" >Filter top N cells</option> <option value="no_filter" >Do not filter</option> </param> <when value="cellranger2" > <param name="n_expected" type="integer" min="1" value="3000" label="Number of expected cells" /> <param name="max_perc" type="float" min="0" max="1" value="0.99" label="Robust maximum percentile for UMI count" /> <param name="max_min_ratio" type="float" min="1" value="10" label="Maximum to minimum ratio for UMI count" /> <param name="output_raw" type="boolean" checked="false" label="Output raw matrix in addition to filtered one" /> </when> <when value="emptydrops" > <param name="nExpectedCells" type="integer" min="1" value="3000" label="Number of expected cells" /> <param name="maxPercentile" type="float" min="0" max="1" value="0.99" label="Robust maximum percentile for UMI count" /> <param name="maxMinRatio" type="float" min="1" value="10" label="Maximum to minimum ratio for UMI count" /> <param name="indMin" type="integer" value="45000" label="Minimum number of barcodes (used as partition parameter for ambient estimation)" /> <param name="indMax" type="integer" value="90000" label="Maximum number of barcodes (used as partition parameter for ambient estimation)" /> <param name="umiMin" type="integer" value="500" label="Consider at least these many UMIs per barcode after initial cell calling" /> <param name="umiMinFracMedian" type="float" value="0.01" label="Minimum UMI:median ratio after initial cell calling" /> <param name="candMaxN" type="integer" value="20000" label="Number of extra barcodes after initial cell calling" /> <param name="FDR" type="float" value="0.01" label="Maximum adjusted p-value for determining a barcode as non-ambient" /> <param name="simN" type="integer" value="10000" label="Number of log likelihood simulations" /> <param name="output_raw" type="boolean" checked="false" label="Output raw matrix in addition to filtered one" /> </when> <when value="topcells" > <param name="n_cells" type="integer" min="1" value="3000" label="Number of top cells to report sorted by UMI count" /> <param name="output_raw" type="boolean" checked="false" label="Output raw matrix in addition to filtered one" /> </when> <when value="no_filter"> <param name="output_raw" type="hidden" value="true" /> </when> </conditional> <param argument="--soloOutFormatFeaturesGeneField3" type="text" value="Gene Expression" label="Field 3 in the Genes output." help="Input '-' to remove the 3rd column from the output." /> <param argument="--outSAMattributes" type="select" display="checkboxes" multiple="true" optional="true" label="Read alignment tags to include in the BAM output"> <expand macro="common_SAM_attributes"/> <option value="CR">CR Cellular barcode sequence bases (uncorrected)</option> <option value="CY">CY Phred quality of the cellular barcode sequence in the CR tag</option> <option value="UR">UR UMI (uncorrected)</option> <option value="UY">UY Phred quality of the UMI</option> <option value="GX">GX Gene ID</option> <option value="GN">GN Gene name</option> <option value="CB">CB Cell identifier (corrected)</option> <option value="UB">UB UMI (corrected)</option> <option value="sM">sM assessment of CB and UMI</option> <option value="sS">sS sequence of the entire barcode (CB,UMI,adapter...)</option> <option value="sQ">quality of the entire barcode</option> </param> <param name="quantModeGene" type="boolean" truevalue="GeneCounts" falsevalue="" checked="false" label="Output global gene count" help="Can be used by MultiQC" /> <param argument="--outSAMunmapped" type="boolean" truevalue="--outSAMunmapped Within" falsevalue="--outSAMunmapped None" checked="false" label="Output unmapped reads in the BAM" /> <expand macro="outSAMmapqUnique"/> <expand macro="limits" /> </section> <expand macro="outWig"/> </inputs> <outputs> <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out"> <expand macro="dbKeyActions" /> </data> <!-- <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes" /> <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes" /> <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts" > <expand macro="dbKeyActions" /> </data> --> <!-- soloCellFilter set to None, if SJ is selected for soloFeatures --> <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes raw" from_work_dir="Solo.out/soloFeatures/raw/features.tsv" > <filter>solo['filter']['output_raw'] or solo['soloFeatures'] == "SJ" </filter> </data> <data format="tsv" name="output_genes_filtered" label="${tool.name} on ${on_string}: Genes filtered" from_work_dir="Solo.out/soloFeatures/filtered/features.tsv" > <filter>solo['filter']['filter_type'] != "no_filter" and solo['soloFeatures'] != "SJ" </filter> </data> <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes raw" from_work_dir="Solo.out/soloFeatures/raw/barcodes.tsv" > <filter>solo['filter']['output_raw'] or solo['soloFeatures'] == "SJ" </filter> </data> <data format="tsv" name="output_barcodes_filtered" label="${tool.name} on ${on_string}: Barcodes filtered" from_work_dir="Solo.out/soloFeatures/filtered/barcodes.tsv" > <filter>solo['filter']['filter_type'] != "no_filter" and solo['soloFeatures'] != "SJ" </filter> </data> <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts raw" from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" > <filter>solo['soloFeatures'] == "Gene" and solo['filter']['output_raw'] </filter> <expand macro="dbKeyActions" /> </data> <data format="mtx" name="output_matrix_filtered" label="${tool.name} on ${on_string}: Matrix Gene Counts filtered" from_work_dir="Solo.out/soloFeatures/filtered/matrix.mtx" > <filter>solo['soloFeatures'] == "Gene" and solo['filter']['filter_type'] != "no_filter" </filter> <expand macro="dbKeyActions" /> </data> <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts" from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" > <filter>solo['soloFeatures'] == "SJ" </filter> <expand macro="dbKeyActions" /> </data> <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts raw" from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" > <filter>"GeneFull" in solo['soloFeatures'] and solo['filter']['output_raw'] </filter> <expand macro="dbKeyActions" /> </data> <data format="mtx" name="output_matrixGeneFull_filtered" label="${tool.name} on ${on_string}: Matrix Full Gene Counts filtered" from_work_dir="Solo.out/soloFeatures/filtered/matrix.mtx" > <filter>"GeneFull" in solo['soloFeatures'] and solo['filter']['filter_type'] != "no_filter" </filter> <expand macro="dbKeyActions" /> </data> <data format="bam" name="output_BAM" label="${tool.name} on ${on_string}: Alignments" > <expand macro="dbKeyActions" /> </data> <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Barcode/Feature Statistic Summaries"/> <data name="reads_per_gene" format="tabular" label="${tool.name} on ${on_string}: combined reads per gene" from_work_dir="ReadsPerGene.out.tab"> <filter>solo['quantModeGene']</filter> <expand macro="outCountActions" /> </data> <expand macro="outWigOutputs"/> </outputs> <!-- Generating test data that is big enough for STARsolo to detect and small enough for Galaxy to test requires careful modification of input FASTA and GTF data, where the length of FASTA cannot exceed the largest position in the GTF file, regardless of the FASTA starting sequence position. A full writeup of how to subset single cell data for use in STARsolo is given here: https://gist.github.com/mtekman/149a7c52fd73e5d8ebe49f5a27b0743d --> <tests> <test expect_num_outputs="7"> <!-- test 1 --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <conditional name="params"> <param name="chemistry" value="Cv3" /> </conditional> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_All" /> </conditional> </conditional> <section name="solo" > <conditional name="filter"> <param name="filter_type" value="no_filter" /> </conditional> <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="Gene" /> <param name="quantModeGene" value="true" /> <conditional name="wasp_conditional"> <param name="waspOutputMode" value="wasp_mode"/> <param name="varVCFfile" value="filtered3.vcf" ftype="vcf" /> </conditional> </section> <output name="output_barcodes" > <assert_contents> <!-- first and last line --> <has_line line="AAACCTGAGCGCTCCA" /> <has_line line="TTTGGTTAGTGGGCTA" /> <has_n_lines n="394" /> </assert_contents> </output> <output name="output_genes"> <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_matrix" > <assert_contents> <has_line_matching expression="14\s+394\s+7" /> <has_line_matching expression="4\s+381\s+1" /> <has_n_lines n="10" /> </assert_contents> </output> <output name="output_stats" > <assert_contents> <has_line_matching expression="\s+noUnmapped\s+5823" /> <has_line_matching expression="\s+yesUMIs\s+8" /> </assert_contents> </output> <output name="output_BAM" value="filtered4.bam" ftype="bam" lines_diff="6"/> <output name="reads_per_gene" > <assert_contents> <has_line_matching expression="ENSG00000279493\s+0\s+0\s+0" /> <has_line_matching expression="ENSG00000275464\s+38\s+1\s+40" /> </assert_contents> <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" /> </output> </test> <test expect_num_outputs="6"> <!-- test 2 --> <!-- same as above, but using custom, no reads_per_gene and include unmapped reads--> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <conditional name="params"> <param name="chemistry" value="custom" /> <param name="soloCBstart" value="1" /> <param name="soloCBlen" value="16" /> <param name="soloUMIstart" value="17" /> <param name="soloUMIlen" value="12" /> </conditional> </conditional> <section name="solo" > <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="Gene" /> <param name="outSAMunmapped" value="true" /> </section> <output name="output_barcodes_filtered" > <assert_contents> <!-- first and last line --> <has_line line="ACACCGGTCTAACGGT" /> <has_line line="TTCTCAATCCACGTTC" /> <has_n_lines n="7" /> </assert_contents> </output> <output name="output_genes_filtered"> <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_matrix_filtered" > <assert_contents> <has_line_matching expression="14\s+7\s+7" /> <has_line_matching expression="4\s+7\s+1" /> <has_n_lines n="10" /> </assert_contents> </output> <output name="output_stats" > <assert_contents> <has_line_matching expression="\s+noUnmapped\s+5823" /> <has_line_matching expression="\s+yesUMIs\s+8" /> </assert_contents> </output> <output name="output_BAM"> <assert_contents> <has_size value="884669" delta="80000" /> </assert_contents> </output> </test> <test expect_num_outputs="6"> <!-- test 3 --> <!-- Multiple repeats test --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <conditional name="params"> <param name="chemistry" value="Cv3" /> </conditional> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_All" /> </conditional> </conditional> <section name="solo" > <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="Gene" /> </section> <output name="output_barcodes_filtered" > <assert_contents> <has_line line="ACACCGGTCTAACGGT" /> <has_line line="TTCTCAATCCACGTTC" /> <has_n_lines n="7" /> </assert_contents> </output> <output name="output_BAM" > <assert_contents> <has_size value="166147" delta="600" /> </assert_contents> </output> </test> <test expect_num_outputs="10"> <!-- test 4 --> <!-- Test with paired collection --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="list_paired" /> <param name="input_collection" > <collection type="paired"> <element name="forward" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </collection> </param> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <conditional name="params"> <param name="chemistry" value="Cv3" /> </conditional> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_All" /> </conditional> </conditional> <section name="solo" > <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="Gene" /> </section> <conditional name="outWig"> <param name="outWigType" value="bedGraph" /> </conditional> <output name="output_barcodes_filtered" > <assert_contents> <has_line line="ACACCGGTCTAACGGT" /> <has_line line="TTCTCAATCCACGTTC" /> <has_n_lines n="7" /> </assert_contents> </output> <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> <output name="signal_unique_str1" file="Signal.Unique.str1.out.bg" /> <output name="signal_uniquemultiple_str1" file="Signal.UniqueMultiple.str1.out.bg" /> <output name="signal_unique_str2" file="Signal.Unique.str2.out.bg" /> <output name="signal_uniquemultiple_str2" file="Signal.UniqueMultiple.str2.out.bg" /> </test> <test expect_num_outputs="9"> <!-- test 5 --> <!-- Test soloFeatures, soloCBmatchWLtype, soloCellFilter, soloOutFormatFeaturesGeneField3 --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <param name="soloCBmatchWLtype" value="1MM_multi_pseudocounts" /> <conditional name="params"> <param name="chemistry" value="Cv3" /> </conditional> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_All" /> </conditional> </conditional> <section name="solo" > <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="GeneFull" /> <conditional name="filter"> <param name="filter_type" value="topcells" /> <param name="n_cells" value="5" /> <param name="output_raw" value="true" /> </conditional> <param name="soloOutFormatFeaturesGeneField3" value="Dummy Text" /> </section> <output name="output_barcodes_filtered" > <assert_contents> <!-- first and last line --> <has_line line="AGACGTTCAAGGCTCC" /> <has_line line="TCAACGAAGCTAGTGG" /> <has_n_lines n="6" /> </assert_contents> </output> <output name="output_genes_filtered" > <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Dummy\s+Text" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Dummy\s+Text" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_matrixGeneFull_filtered" > <assert_contents> <has_line_matching expression="14\s+6\s+14" /> <has_line_matching expression="10\s+6\s+1" /> <has_n_lines n="17" /> </assert_contents> </output> <output name="output_barcodes" > <assert_contents> <!-- first and last line --> <has_line line="AAACCTGAGCGCTCCA" /> <has_line line="TTTGGTTAGTGGGCTA" /> <has_n_lines n="394" /> </assert_contents> </output> <output name="output_genes"> <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Dummy\s+Text" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Dummy\s+Text" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_matrix" > <assert_contents> <has_line_matching expression="14\s+394\s+195" /> <has_line_matching expression="3\s+1\s+1" /> <has_n_lines n="198" /> </assert_contents> </output> </test> <test expect_num_outputs="6"> <!-- test 6 --> <!-- Emptydrops filtering --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <conditional name="params"> <param name="chemistry" value="Cv3" /> </conditional> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_All" /> </conditional> </conditional> <section name="solo" > <conditional name="filter"> <param name="filter_type" value="emptydrops" /> <param name="nExpectedCells" value="5" /> <param name="maxPercentile" value="0.99" /> <param name="maxMinRatio" value="10" /> <param name="indMin" value="45000" /> <param name="indMax" value="90000" /> <param name="umiMin" value="500" /> <param name="umiMinFracMedian" value="0.01" /> <param name="candMaxN" value="20000" /> <param name="FDR" value="0.01" /> <param name="simN" value="10000" /> </conditional> <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="Gene" /> </section> <output name="output_barcodes_filtered"> <assert_contents> <!-- first and last line --> <has_line line="ACACCGGTCTAACGGT" /> <has_line line="TTCTCAATCCACGTTC" /> <has_n_lines n="7" /> </assert_contents> </output> <output name="output_genes_filtered"> <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_matrix_filtered" > <assert_contents> <has_line_matching expression="14\s+7\s+7" /> <has_line_matching expression="4\s+7\s+1" /> <has_n_lines n="10" /> </assert_contents> </output> <output name="output_stats" > <assert_contents> <has_line_matching expression="\s+noUnmapped\s+5823" /> <has_line_matching expression="\s+yesUMIs\s+8" /> </assert_contents> </output> <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> </test> <test expect_num_outputs="6"> <!-- test 7 --> <!-- Test soloType CB_UMI_Complex --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Complex" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="indrop.R1.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="indrop.R2.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <repeat name="cb_whitelists" > <param name="whitelist_file" value="indrop.barcodes1.txt"/> <param name="cb_start_anchor" value="0" /> <param name="cb_start_anchor_pos" value="0" /> <param name="cb_end_anchor" value="2" /> <param name="cb_end_anchor_pos" value="-1" /> </repeat> <repeat name="cb_whitelists" > <param name="whitelist_file" value="indrop.barcodes2.txt"/> <param name="cb_start_anchor" value="3" /> <param name="cb_start_anchor_pos" value="1" /> <param name="cb_end_anchor" value="3" /> <param name="cb_end_anchor_pos" value="8" /> </repeat> <param name="umi_start_anchor" value="3" /> <param name="umi_start_anchor_pos" value="9" /> <param name="umi_end_anchor" value="3" /> <param name="umi_end_anchor_pos" value="14" /> <param name="soloAdapterSequence" value="GAGTGATTGCTTGTGACGCCTT" /> <param name="soloAdapterMismatchesNmax" value="1" /> <param name="clipAdapterType" value="CellRanger4" /> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_All" /> </conditional> <param name="soloCBmatchWLtype" value="1MM" /> </conditional> <output name="output_barcodes_filtered" > <assert_contents> <!-- first and last line --> <has_line line="ACAACGTGG_AAACCTCC" /> <has_line line="ATTCCAGAC_TTCGCTGG" /> <has_n_lines n="33" /> </assert_contents> </output> <output name="output_genes_filtered"> <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_matrix_filtered" > <assert_contents> <has_line_matching expression="14\s+33\s+36" /> <has_line_matching expression="2\s+33\s+1" /> <has_n_lines n="39" /> </assert_contents> </output> <output name="output_stats" > <assert_contents> <has_line_matching expression="\s+yesWLmatchExact\s+791" /> <has_line_matching expression="\s+yesUMIs\s+36" /> </assert_contents> </output> </test> <test expect_num_outputs="6"> <!-- test 8 --> <!-- Test soloType SmartSeq --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="SmartSeq" /> <conditional name="input_types_smart_seq"> <param name="use" value="list_paired_end" /> <param name="paired_end_collection" > <collection type="list:paired"> <element name="pair1"> <collection type="paired"> <element name="forward" value="smartseq1.R1.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="smartseq1.R2.fastq.gz" ftype="fastqsanger.gz" /> </collection> </element> <element name="pair2"> <collection type="paired"> <element name="forward" value="smartseq2.R1.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="smartseq2.R2.fastq.gz" ftype="fastqsanger.gz" /> </collection> </element> <element name="pair3"> <collection type="paired"> <element name="forward" value="smartseq3.R1.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="smartseq3.R2.fastq.gz" ftype="fastqsanger.gz" /> </collection> </element> <element name="pair4"> <collection type="paired"> <element name="forward" value="smartseq4.R1.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="smartseq4.R2.fastq.gz" ftype="fastqsanger.gz" /> </collection> </element> <element name="pair5"> <collection type="paired"> <element name="forward" value="smartseq5.R1.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="smartseq5.R2.fastq.gz" ftype="fastqsanger.gz" /> </collection> </element> <element name="pair6"> <collection type="paired"> <element name="forward" value="smartseq6.R1.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="smartseq6.R2.fastq.gz" ftype="fastqsanger.gz" /> </collection> </element> <element name="pair7"> <collection type="paired"> <element name="forward" value="smartseq7.R1.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="smartseq7.R2.fastq.gz" ftype="fastqsanger.gz" /> </collection> </element> <element name="pair8"> <collection type="paired"> <element name="forward" value="smartseq8.R1.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="smartseq8.R2.fastq.gz" ftype="fastqsanger.gz" /> </collection> </element> <element name="pair9"> <collection type="paired"> <element name="forward" value="smartseq9.R1.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="smartseq9.R2.fastq.gz" ftype="fastqsanger.gz" /> </collection> </element> </collection> </param> </conditional> <param name="cell_ids" value="smartseq.cellids.txt" /> <conditional name="umidedup"> <param name="soloUMIdedup" value="Exact" /> </conditional> </conditional> <section name="solo" > <param name="soloStrand" value="Unstranded" /> <conditional name="filter"> <param name="filter_type" value="topcells" /> <param name="n_cells" value="2" /> </conditional> </section> <output name="output_barcodes_filtered" > <assert_contents> <has_line line="CSC6_D02" /> <not_has_text text="MGH26_A02" /> <has_n_lines n="3" /> </assert_contents> </output> <output name="output_genes_filtered"> <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_matrix_filtered" > <assert_contents> <has_line_matching expression="14\s+3\s+10" /> <has_line_matching expression="12\s+3\s+1" /> <has_n_lines n="13" /> </assert_contents> </output> <output name="output_stats" > <assert_contents> <has_line_matching expression="\s+yesWLmatchExact\s+9000" /> <has_line_matching expression="\s+yesUMIs\s+32" /> </assert_contents> </output> </test> <test expect_num_outputs="6"> <!-- test 9 --> <!-- Test outSAMattributes --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <conditional name="params"> <param name="chemistry" value="Cv3" /> </conditional> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_All" /> </conditional> </conditional> <section name="solo" > <conditional name="filter"> <param name="filter_type" value="no_filter" /> </conditional> <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="Gene" /> <param name="outSAMattributes" value="NH,HI,AS,nM,GX,GN,CB,UB" /> </section> <output name="output_barcodes" > <assert_contents> <!-- first and last line --> <has_line line="AAACCTGAGCGCTCCA" /> <has_line line="TTTGGTTAGTGGGCTA" /> <has_n_lines n="394" /> </assert_contents> </output> <output name="output_genes"> <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_matrix" > <assert_contents> <has_line_matching expression="14\s+394\s+7" /> <has_line_matching expression="4\s+381\s+1" /> <has_n_lines n="10" /> </assert_contents> </output> <output name="output_stats" > <assert_contents> <has_line_matching expression="\s+noUnmapped\s+5823" /> <has_line_matching expression="\s+yesUMIs\s+8" /> </assert_contents> </output> <output name="output_BAM" > <assert_contents> <has_size value="153108" delta="600" /> </assert_contents> </output> </test> <test expect_num_outputs="6"> <!-- test 10 --> <!-- Test soloFeatures --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <param name="soloCBmatchWLtype" value="1MM_multi_pseudocounts" /> <conditional name="params"> <param name="chemistry" value="Cv3" /> </conditional> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_CR" /> <param name="soloUMIfiltering" value="MultiGeneUMI" /> </conditional> </conditional> <section name="solo" > <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="GeneFull_ExonOverIntron" /> <conditional name="filter"> <param name="filter_type" value="no_filter" /> </conditional> <param name="soloOutFormatFeaturesGeneField3" value="Dummy Text" /> </section> <output name="output_barcodes" > <assert_contents> <!-- first and last line --> <has_line line="AAACCTGAGCGCTCCA" /> <has_line line="TTTGGTTAGTGGGCTA" /> <has_n_lines n="394" /> </assert_contents> </output> <output name="output_genes" > <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Dummy\s+Text" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Dummy\s+Text" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_matrixGeneFull" > <assert_contents> <has_line_matching expression="14\s+394\s+104" /> <has_line_matching expression="10\s+2\s+1" /> <has_n_lines n="107" /> </assert_contents> </output> </test> <test expect_num_outputs="7"> <!-- test 11 indexed --> <conditional name="refGenomeSource"> <param name="geneSource" value="indexed" /> <conditional name="GTFconditional"> <param name="GTFselect" value="without-gtf-with-gtf" /> <param name="genomeDir" value="000" /> <param name="sjdbOverhang" value="75"/> <param name="sjdbGTFfile" value="test1.gtf" ftype="gtf"/> </conditional> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <conditional name="params"> <param name="chemistry" value="Cv3" /> </conditional> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_All" /> </conditional> </conditional> <section name="solo" > <conditional name="filter"> <param name="filter_type" value="no_filter" /> </conditional> <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="Gene" /> <param name="quantModeGene" value="true" /> </section> <output name="output_barcodes" > <assert_contents> <!-- first and last line --> <has_line line="AAACCTGAGCGCTCCA" /> <has_line line="TTTGGTTAGTGGGCTA" /> <has_n_lines n="394" /> </assert_contents> </output> <output name="output_genes"> <assert_contents> <has_line_matching expression="GENE1\s+GENE1\s+Gene\s+Expression" /> <has_n_lines n="1" /> </assert_contents> </output> <output name="output_matrix" > <assert_contents> <has_line_matching expression="1\s+394\s+31" /> <has_line_matching expression="1\s+2\s+1" /> <has_n_lines n="34" /> </assert_contents> </output> <output name="output_stats" > <assert_contents> <has_line_matching expression="\s+noUnmapped\s+6335" /> <has_line_matching expression="\s+yesUMIs\s+33" /> </assert_contents> </output> <output name="output_BAM"> <assert_contents> <has_size value="7133" delta="1000"/> </assert_contents> </output> <output name="reads_per_gene" > <assert_contents> <has_line_matching expression="GENE1\s+41\s+41\s+0" /> </assert_contents> </output> </test> <test expect_num_outputs="6"> <!-- test 12 --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <conditional name="params"> <param name="chemistry" value="Cv3" /> </conditional> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_All" /> </conditional> </conditional> <section name="solo" > <conditional name="filter"> <param name="filter_type" value="no_filter" /> </conditional> <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="Gene Velocyto" /> <param name="quantModeGene" value="true" /> </section> <output name="output_barcodes" > <assert_contents> <!-- first and last line --> <has_line line="AAACCTGAGCGCTCCA" /> <has_line line="TTTGGTTAGTGGGCTA" /> <has_n_lines n="394" /> </assert_contents> </output> <output name="output_genes"> <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_stats" > <assert_contents> <has_line_matching expression="\s+noUnmapped\s+0" /> <has_line_matching expression="\s+yesUMIs\s+36" /> </assert_contents> </output> <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> <output name="reads_per_gene" > <assert_contents> <has_line_matching expression="ENSG00000279493\s+0\s+0\s+0" /> <has_line_matching expression="ENSG00000275464\s+38\s+1\s+40" /> </assert_contents> <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" /> </output> </test> <test expect_num_outputs="6"> <!-- test 13 If the splice feature set is not set correctly, "Fatal INPUT FILE error, no exon lines in the GTF file" is expected --> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> <param name="genomeSAindexNbases" value="4" /> <param name="sjdbOverhang" value="100" /> <param name="sjdbGTFfile" value="fakexon.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/> <param name="sjdbGTFfeatureExon" value="fakexon"/> </conditional> <conditional name="sc" > <param name="solo_type" value="CB_UMI_Simple" /> <conditional name="input_types"> <param name="use" value="repeat" /> <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> </conditional> <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> <conditional name="params"> <param name="chemistry" value="Cv3" /> </conditional> <conditional name="umidedup"> <param name="soloUMIdedup" value="1MM_All" /> </conditional> </conditional> <section name="solo" > <conditional name="filter"> <param name="filter_type" value="no_filter" /> </conditional> <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="Gene Velocyto" /> <param name="quantModeGene" value="true" /> </section> <output name="output_barcodes" > <assert_contents> <!-- first and last line --> <has_line line="AAACCTGAGCGCTCCA" /> <has_line line="TTTGGTTAGTGGGCTA" /> <has_n_lines n="394" /> </assert_contents> </output> <output name="output_genes"> <assert_contents> <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" /> <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" /> <has_n_lines n="14" /> </assert_contents> </output> <output name="output_stats" > <assert_contents> <has_line_matching expression="\s+noUnmapped\s+0" /> <has_line_matching expression="\s+yesUMIs\s+36" /> </assert_contents> </output> <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> <output name="reads_per_gene" > <assert_contents> <has_line_matching expression="ENSG00000279493\s+0\s+0\s+0" /> <has_line_matching expression="ENSG00000275464\s+38\s+1\s+40" /> </assert_contents> <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" /> </output> </test> </tests> <help><![CDATA[ **What it does** **STARSolo** is a turnkey solution for analyzing droplet single cell RNA sequencing data (e.g. 10X Genomics Chromium System) built directly into STAR_ code. STARsolo takes raw FASTQ reads files as input, and performs the following operations: * Error correction and demultiplexing of cell barcodes using user-input whitelist * Mapping the reads to the reference genome using the standard STAR spliced read alignment algorithm * Error correction and collapsing (deduplication) of Unique Molecular Identifiers (UMIs) * Quantification of per-cell gene expression by counting the number of reads per gene STARsolo output is designed to be a drop-in replacement for 10X CellRanger gene quantification output. It follows CellRanger logic for cell barcode whitelisting and UMI deduplication, and produces nearly identical gene counts in the same format. At the same time STARsolo is 10 times faster than CellRanger. .. _STAR: https://github.com/alexdobin/STAR ]]></help> <expand macro="citations"/> </tool>