Repository 'rgrnastar'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar

Changeset 30:4014de1b6daf (2024-08-27)
Previous changeset 29:b0f2be869d6d (2024-02-14)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 2b3fa63863a366beef057c7f75ccbcaf0c280151
modified:
macros.xml
rg_rnaStar.xml
test-data/rnastar_test.log
added:
test-data/filtered3.vcf
test-data/filtered4.bam
test-data/rnastar_test_splicejunctions_wasp.bed
b
diff -r b0f2be869d6d -r 4014de1b6daf macros.xml
--- a/macros.xml Wed Feb 14 09:03:31 2024 +0000
+++ b/macros.xml Tue Aug 27 14:11:16 2024 +0000
[
b'@@ -5,7 +5,7 @@\n     the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ -->\n     <!-- STAR version to be used -->\n     <token name="@TOOL_VERSION@">2.7.11a</token>\n-    <token name="@VERSION_SUFFIX@">0</token>\n+    <token name="@VERSION_SUFFIX@">1</token>\n     <token name="@PROFILE@">21.01</token>\n     <!-- STAR index version compatible with this version of STAR\n     This is the STAR version that introduced the index structure expected\n@@ -17,16 +17,14 @@\n     <token name="@IDX_VERSION@">2.7.4a</token>\n     <token name="@IDX_VERSION_SUFFIX@">2</token>\n     <token name="@IDX_DATA_TABLE@">rnastar_index2x_versioned</token>\n-\n     <xml name="requirements">\n         <requirements>\n             <requirement type="package" version="@TOOL_VERSION@">star</requirement>\n             <requirement type="package" version="1.18">samtools</requirement>\n             <requirement type="package" version="1.13">gzip</requirement>\n-            <yield />\n+            <yield/>\n         </requirements>\n     </xml>\n-\n     <xml name="edam">\n         <edam_topics>\n             <edam_topic>topic_3170</edam_topic>\n@@ -36,20 +34,16 @@\n             <edam_operation>operation_0292</edam_operation>\n         </edam_operations>\n     </xml>\n-\n     <xml name="index_selection" token_with_gene_model="0">\n-        <param argument="--genomeDir" type="select"\n-        label="Select reference genome"\n-        help="If your genome of interest is not listed, contact the Galaxy team">\n+        <param argument="--genomeDir" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team">\n             <options from_data_table="@IDX_DATA_TABLE@">\n-                <filter type="static_value" column="4" value="@WITH_GENE_MODEL@" />\n-                <filter type="static_value" column="5" value="@IDX_VERSION@" />\n-                <filter type="sort_by" column="2" />\n-                <validator type="no_options" message="No indexes are available for the selected input dataset" />\n+                <filter type="static_value" column="4" value="@WITH_GENE_MODEL@"/>\n+                <filter type="static_value" column="5" value="@IDX_VERSION@"/>\n+                <filter type="sort_by" column="2"/>\n+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>\n             </options>\n         </param>\n     </xml>\n-\n     <token name="@FASTQ_GZ_OPTION@">\n         --readFilesCommand zcat\n     </token>\n@@ -59,9 +53,9 @@\n         </citations>\n     </xml>\n     <xml name="SJDBOPTIONS">\n-         <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="false" help="Exon junction information for mapping splices"/>\n-         <param argument="--sjdbGTFfeatureExon" type="text" value="exon" label="Elements to use from the gene model to use for splice junctions" help="By default and for almost all cases: \'exon\', referring to finding junctions at the RNA splice sites. This can optionally be changed to allow splicing at other levels, such as \'gene\', \'transcript\', \'CDS\'."/>\n-         <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/>\n+        <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="false" help="Exon junction information for mapping splices"/>\n+        <param argument="--sjdbGTFfeatureExon" type="text" value="exon" label="Elements to use from the gene model to use for splice junctions" help="By default and for almost all cases: \'exon\', referring to finding junctions at the RNA splice sites. This can optionally be changed to allow splicing at other levels, such as \'gene\', \'transcript\', \'CDS\'."/>\n+        <param argument="--sjdbOverhang" type="inte'..b'           <option value="-">No per gene or transcript output as no GTF was provided</option>\n             </param>\n-            <when value="-" />\n+            <when value="-"/>\n         </conditional>\n     </xml>\n     <xml name="outSAMmapqUnique">\n         <!-- MAPQ 255 is the default in STAR (coming from tophat behaviour and compatibility for Cufflinks) but it is a problematic value\n         - according to SAM/BAM specs it means "undefined".\n         - Using 255 as the max mapq causes problem with modern downstream tools like mutect2: https://sites.duke.edu/workblog/2021/08/18/star-rnaseq-gatk-mutect2/ and 60 has become an inofficial replacement for 255. -->\n-        <param argument="--outSAMmapqUnique" type="integer" value="60" min="0" max="255"\n-        label="MAPQ value for unique mappers"\n-        help="STAR bases the mapping quality scores of alignment records in its BAM output on the number of alternative mappings for the read. If a read maps to multiple locations on the reference genome, the following MAPQ scoring scheme is\n-used: >=5 mappings => MAPQ=0; 3-4 mappings => MAPQ=1; 2 mappings => MAPQ=3. This setting lets you control the MAPQ used for reads mapped to a single location. Set to 255 for compatibility with Cufflink (default in STAR) but keep to 60 for modern downstream tools like mutect2." />\n+        <param argument="--outSAMmapqUnique" type="integer" value="60" min="0" max="255" label="MAPQ value for unique mappers" help="STAR bases the mapping quality scores of alignment records in its BAM output on the number of alternative mappings for the read. If a read maps to multiple locations on the reference genome, the following MAPQ scoring scheme is used: &gt;=5 mappings =&gt; MAPQ=0; 3-4 mappings =&gt; MAPQ=1; 2 mappings =&gt; MAPQ=3. This setting lets you control the MAPQ used for reads mapped to a single location. Set to 255 for compatibility with Cufflink (default in STAR) but keep to 60 for modern downstream tools like mutect2."/>\n+    </xml>\n+    <xml name="wasp">\n+        <!--\n+            This is re-implementation of the original WASP algorithm by Bryce van de Geijn, Graham McVicker,\n+            Yoav Gilad and Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12,\n+            1061\xe2\x80\x931063 (2015) https://www.nature.com/articles/nmeth.3582. WASP filtering is activated\n+            with "waspOutputMode SAMtag".\n+            -->\n+        <conditional name="wasp_conditional">\n+            <param argument="--waspOutputMode" type="select" label="Actiavte WASP filtering">\n+                <help><![CDATA[This is a reimplementation of the original WASP algorithm by Bryce van de Geijn, Graham McVicker,\n+                    Yoav Gilad and Jonathan K Pritchard. https://doi.org/10.1038/nmeth.3582. This option will add the vW tag to the SAM output. vW:i:1 means\n+                    alignment passed WASP filtering, and all other values mean it did not:<br/>\n+                    - vW:i:2 = multi-mapping read<br/>\n+                    - vW:i:3 = variant base in the read is N (non-ACGT)<br/>\n+                    - vW:i:4 = remapped read did not map <br/>\n+                    - vW:i:5 = remapped read multi-maps <br/>\n+                    - vW:i:6 = remapped read maps to a different locus <br/>\n+                    - vW:i:7 = read overlaps too many variants <br/>\n+                    ]]>\n+                </help>\n+                <option value="" selected="true">No WASP filtering</option>\n+                <option value="wasp_mode">Activate WASP filtering</option>\n+            </param>\n+            <when value="wasp_mode">\n+                <param argument="--varVCFfile" type="data" format="vcf" label="VCF file with personal variants" help="Each variant is expected to have a genotype with two alleles. The VCF file needs to have the 10th column with genotype recorded as 0/1, 1/0, 1/1 (or | instead of /)"/>\n+            </when>\n+            <when value=""/>\n+        </conditional>\n     </xml>\n </macros>\n'
b
diff -r b0f2be869d6d -r 4014de1b6daf rg_rnaStar.xml
--- a/rg_rnaStar.xml Wed Feb 14 09:03:31 2024 +0000
+++ b/rg_rnaStar.xml Tue Aug 27 14:11:16 2024 +0000
b
@@ -47,9 +47,11 @@
 
         ## Two pass mode
         --twopassMode ${twopass.twopassMode} ${twopass.twopass_read_subset}
-        #for $sj_input in $twopass.sj_precalculated:
-            '$sj_input'
-        #end for
+        #if str($twopass.sj_precalculated).strip():
+            #for $sj_input in $twopass.sj_precalculated:
+                '$sj_input'
+            #end for
+        #end if
         #if str($twopass.twopassMode) != 'None':
             #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf':
                 ## need to check first if its a cached index or from history
@@ -215,7 +217,7 @@
             #end if
 
             ## Limits
-                @LIMITS@
+            @LIMITS@
         #else:
             ## Go with STAR's default algorithmic settings,
             ## but we need to provide a reasonable default
@@ -232,12 +234,16 @@
                 #end if
             #end if
         #end if
-
         --outBAMsortingThreadN \${GALAXY_SLOTS:-4}
         --outBAMsortingBinsN $perf.outBAMsortingBinsN
         --winAnchorMultimapNmax $perf.winAnchorMultimapNmax
         --limitBAMsortRAM \$((\${GALAXY_MEMORY_MB:-0}*1000000))
 
+        #if $oformat.wasp_conditional.waspOutputMode == "wasp_mode":
+            --waspOutputMode SAMtag
+            --varVCFfile '$oformat.wasp_conditional.varVCFfile'
+        #end if
+
         ## Handle chimeric options and output
         #if str($chimOutType):
             --chimOutType $chimOutType
@@ -408,6 +414,7 @@
             primary?"/> -->
             <param name="outSAMprimaryFlag" type="hidden" value="OneBestScore" />
             <expand macro="outSAMmapqUnique"/>
+            <expand macro="wasp"/>
         </section>
         <section name="filter" title="Output filter criteria" expanded="true">
             <param name="basic_filters" type="select" display="checkboxes" multiple="true" optional="true"
@@ -565,20 +572,24 @@
         </data>
         <expand macro="outWigOutputs"/>
     </outputs>
-
     <tests>
         <test expect_num_outputs="3">
             <conditional name="singlePaired">
-                <param name="sPaired" value="single" />
-                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
+                <param name="sPaired" value="paired" />
+                <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
             </conditional>
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
-                <param name="genomeFastaFiles" value="tophat_test.fa.gz" />
+                <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
                 <param name="genomeSAindexNbases" value="5" />
             </conditional>
             <section name="oformat">
                 <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
+                <conditional name="wasp_conditional">
+                    <param name="waspOutputMode" value="wasp_mode"/>
+                    <param name="varVCFfile" value="filtered3.vcf" ftype="vcf" />
+                </conditional>
             </section>
             <section name="algo">
                 <conditional name="params">
@@ -586,8 +597,7 @@
                 </conditional>
             </section>
             <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
-            <output name="splice_junctions" file="rnastar_test_splicejunctions.bed"/>
-            <output name="mapped_reads" file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="634" />
+            <output name="splice_junctions" file="rnastar_test_splicejunctions_wasp.bed"/>
         </test>
         <!-- test with cached genome index -->
         <test expect_num_outputs="3">
b
diff -r b0f2be869d6d -r 4014de1b6daf test-data/filtered3.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered3.vcf Tue Aug 27 14:11:16 2024 +0000
b
b'@@ -0,0 +1,175 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##bcftoolsVersion=1.15.1+htslib-1.15.1\n+##bcftoolsCommand=mpileup -f filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa filtered3.bam\n+##reference=file://filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa\n+##contig=<ID=21,length=697740>\n+##ALT=<ID=*,Description="Represents allele(s) other than observed.">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of raw reads supporting an indel">\n+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of raw reads supporting an indel">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version="3">\n+##INFO=<ID=RPBZ,Number=1,Type=Float,Description="Mann-Whitney U-z test of Read Position Bias (closer to 0 is better)">\n+##INFO=<ID=MQBZ,Number=1,Type=Float,Description="Mann-Whitney U-z test of Mapping Quality Bias (closer to 0 is better)">\n+##INFO=<ID=BQBZ,Number=1,Type=Float,Description="Mann-Whitney U-z test of Base Quality Bias (closer to 0 is better)">\n+##INFO=<ID=MQSBZ,Number=1,Type=Float,Description="Mann-Whitney U-z test of Mapping Quality vs Strand Bias (closer to 0 is better)">\n+##INFO=<ID=SCBZ,Number=1,Type=Float,Description="Mann-Whitney U-z test of Soft-Clip Length Bias (closer to 0 is better)">\n+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher\'s exact test to detect strand bias">\n+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">\n+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes for each ALT allele, in the same order as listed">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=DP4,Number=4,Type=Integer,Description="Number of high-quality ref-forward , ref-reverse, alt-forward and alt-reverse bases">\n+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Average mapping quality">\n+##bcftools_callVersion=1.15.1+htslib-1.15.1\n+##bcftools_callCommand=call -mv -Ob -o filtered3.bcf; Date=Tue Aug 27 14:09:53 2024\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tfiltered3.bam\n+21\t13871\t.\tG\tA\t8.19139\t.\tDP=16;VDB=0.163924;SGB=-0.651104;RPBZ=-1.26304;MQBZ=1.7272;BQBZ=1;SCBZ=-0.447396;FS=0;MQ0F=0.0625;AC=1;AN=2;DP4=8,0,8,0;MQ=9\tGT:PL\t0/1:38,0,6\n+21\t13918\t.\tA\tG\t8.05611\t.\tDP=9;VDB=0.12166;SGB=-0.616816;RPBZ=1.03713;MQBZ=-0.154303;BQBZ=0;SCBZ=0.707107;FS=0;MQ0F=0.111111;AC=1;AN=2;DP4=3,0,6,0;MQ=14\tGT:PL\t0/1:40,0,15\n+21\t13924\t.\tG\tA\t27.0767\t.\tDP=11;VDB=0.0108096;SGB=-0.651104;RPBZ=-0.261911;MQBZ=0.968246;BQBZ=-0.745356;SCBZ=-0.931695;FS=0;MQ0F=0.0909091;AC=2;AN=2;DP4=2,0,8,0;MQ=14\tGT:PL\t1/1:54,3,0\n+21\t13928\t.\tG\tC\t34.42\t.\tDP=11;VDB=0.0257451;SGB=-0.670168;FS=0;MQ0F=0.0909091;AC=2;AN=2;DP4=0,0,10,0;MQ=14\tGT:PL\t1/1:64,24,0\n+21\t13962\t.\tA\tG\t3.21856\t.\tDP=13;VDB=0.00595258;SGB=-0.616816;RPBZ=2.14581;MQBZ=0.245201;BQBZ=-0.113961;SCBZ=-1.73888;FS=0;MQ0F=0.0769231;AC=1;AN=2;DP4=7,0,6,0;MQ=12\tGT:PL\t0/1:33,0,34\n+21\t15349\t.\tG\tC\t3.74242\t.\tDP=33;VDB=3.28209e-07;SGB=-0.689466;RPBZ=-1.0584;MQBZ=1.31911;MQSBZ=-1.01601;BQBZ=-1.40181;SCBZ=0.601646;FS=0;MQ0F=0.0606061;AC=1;AN=2;DP4=1,15,1,15;MQ=13\tGT:PL\t0/1:34,0,49\n+21\t15425\t.\tT\tC\t15.3182\t.\tDP=21;VDB=0.739317;SGB=-0.686358;RPBZ=-1.79168;MQBZ=0.125524;BQBZ=1.41022;SCBZ=0.514874;FS=0;MQ0F=0;AC=1;AN=2;DP4=0,7,0,14;MQ=12\tGT:PL\t0/1:47,0,9\n+21\t15431\t.\tG\tA\t6.81655\t.\tDP=18;VDB=0.777008;SGB=-0.683931;RPBZ=-0.93741;MQBZ=-1.01859;BQBZ=1.61245;SCBZ=-1.53815;FS=0;MQ0F=0;AC=1;AN=2;DP4=0,5,0,13;MQ=10\tGT:PL\t0/1:38,0,11\n+21\t19605\t.\tA\t'..b';FS=0;MQ0F=0;AC=2;AN=2;DP4=4,0,5,0;MQ=11\tGT:PL\t1/1:37,3,0\n+21\t152547\t.\tA\tG\t8.54666\t.\tDP=8;VDB=0.00477428;SGB=-0.636426;RPBZ=0.223607;MQBZ=-0.774597;BQBZ=-0.377964;SCBZ=0.377964;FS=0;MQ0F=0;AC=2;AN=2;DP4=1,0,7,0;MQ=12\tGT:PL\t1/1:35,4,0\n+21\t221789\t.\tT\tC\t19.4636\t.\tDP=5;VDB=0.300397;SGB=-0.590765;FS=0;MQ0F=0.2;AC=2;AN=2;DP4=0,0,5,0;MQ=12\tGT:PL\t1/1:49,15,0\n+21\t221824\t.\tA\tG\t17.2912\t.\tDP=5;VDB=0.446842;SGB=-0.556411;RPBZ=0;MQBZ=0.790569;BQBZ=0;SCBZ=0;FS=0;MQ0F=0.2;AC=2;AN=2;DP4=1,0,4,0;MQ=12\tGT:PL\t1/1:44,8,0\n+21\t236291\t.\tA\tG\t3.21958\t.\tDP=7;VDB=0.535497;SGB=-0.556411;RPBZ=0.707107;MQBZ=-0.866025;BQBZ=0;SCBZ=-0.585142;FS=0;MQ0F=0;AC=1;AN=2;DP4=0,3,0,4;MQ=17\tGT:PL\t0/1:33,0,31\n+21\t236294\t.\tC\tT\t4.32521\t.\tDP=6;VDB=0.176409;SGB=-0.556411;RPBZ=1.38873;MQBZ=-0.707107;BQBZ=0;SCBZ=0.491869;FS=0;MQ0F=0;AC=1;AN=2;DP4=0,2,0,4;MQ=16\tGT:PL\t0/1:35,0,19\n+21\t236295\t.\tA\tG\t4.32521\t.\tDP=6;VDB=0.522837;SGB=-0.556411;RPBZ=0;MQBZ=-0.707107;BQBZ=0;SCBZ=-1.47561;FS=0;MQ0F=0;AC=1;AN=2;DP4=0,2,0,4;MQ=16\tGT:PL\t0/1:35,0,19\n+21\t236320\t.\tG\tA\t3.20067\t.\tDP=8;VDB=0.220638;SGB=-0.556411;RPBZ=1.76777;MQBZ=0;BQBZ=-0.866025;SCBZ=0.866025;FS=0;MQ0F=0;AC=1;AN=2;DP4=0,3,0,4;MQ=14\tGT:PL\t0/1:33,0,20\n+21\t236327\t.\tA\tG\t6.77159\t.\tDP=6;VDB=0.673544;SGB=-0.590765;RPBZ=-0.29277;MQBZ=-0.69282;BQBZ=-0.447214;SCBZ=0.69282;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,1,0,5;MQ=14\tGT:PL\t1/1:34,0,2\n+21\t236359\t.\tA\tT\t8.99921\t.\tDP=4;VDB=0.160409;SGB=-0.556411;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,0,4;MQ=11\tGT:PL\t1/1:38,12,0\n+21\t248947\t.\tT\tC\t4.85359\t.\tDP=4;VDB=0.38;SGB=-0.453602;RPBZ=-0.774597;MQBZ=1.73205;MQSBZ=-1;BQBZ=0;SCBZ=1;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,2,1,1;MQ=11\tGT:PL\t1/1:30,2,0\n+21\t248966\t.\tG\tA\t12.6566\t.\tDP=4;VDB=0.112085;SGB=-0.556411;MQSBZ=-1;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,1,3;MQ=11\tGT:PL\t1/1:42,12,0\n+21\t251560\t.\tC\tT\t11.7172\t.\tDP=3;VDB=0.137328;SGB=-0.511536;MQSBZ=0.707107;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,2,1;MQ=14\tGT:PL\t1/1:41,9,0\n+21\t289446\t.\tT\tC\t49.4146\t.\tDP=22;VDB=1.4824e-07;SGB=-0.692562;FS=0;MQ0F=0.0454545;AC=2;AN=2;DP4=0,0,22,0;MQ=12\tGT:PL\t1/1:79,66,0\n+21\t289481\t.\tT\tC\t3.73429\t.\tDP=48;VDB=0.22406;SGB=-0.692976;RPBZ=0.341551;MQBZ=-1.11595;BQBZ=-1.2128;SCBZ=-1.70538;FS=0;MQ0F=0.0208333;AC=1;AN=2;DP4=22,0,26,0;MQ=11\tGT:PL\t0/1:34,0,44\n+21\t289488\t.\tT\tC\t6.50824\t.\tDP=50;VDB=0.518994;SGB=-0.693021;RPBZ=0.545171;MQBZ=-1.485;BQBZ=2.489;SCBZ=-1.74235;FS=0;MQ0F=0.04;AC=1;AN=2;DP4=23,0,27,0;MQ=11\tGT:PL\t0/1:38,0,40\n+21\t289520\t.\tA\tC\t21.7432\t.\tDP=41;VDB=4.57336e-05;SGB=-0.69312;RPBZ=-0.393988;MQBZ=0.311388;BQBZ=2.69313;SCBZ=1.10119;FS=0;MQ0F=0.0243902;AC=2;AN=2;DP4=9,0,32,0;MQ=10\tGT:PL\t1/1:49,21,0\n+21\t289647\t.\tG\tA\t3.22451\t.\tDP=3;VDB=0.873285;SGB=-0.511536;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,3,0;MQ=14\tGT:PL\t1/1:30,9,0\n+21\t339923\t.\tC\tT\t7.30814\t.\tDP=2;VDB=0.56;SGB=-0.453602;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,2,0;MQ=20\tGT:PL\t1/1:36,6,0\n+21\t355659\t.\tC\tT\t6.32811\t.\tDP=4;VDB=0.868052;SGB=-0.511536;RPBZ=-1.34164;MQBZ=1;BQBZ=0;SCBZ=0;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,1,0,3;MQ=11\tGT:PL\t1/1:32,5,0\n+21\t553515\t.\tC\tG\t7.30814\t.\tDP=2;VDB=0.68;SGB=-0.453602;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,2,0;MQ=20\tGT:PL\t1/1:36,6,0\n+21\t602273\t.\tA\tC\t6.32811\t.\tDP=4;VDB=0.131948;SGB=-0.511536;RPBZ=-1.34164;MQBZ=1;BQBZ=-0.57735;SCBZ=-1.73205;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,1,0,3;MQ=11\tGT:PL\t1/1:32,5,0\n+21\t682407\t.\tG\tT\t7.30814\t.\tDP=2;VDB=0.56;SGB=-0.453602;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,0,2;MQ=20\tGT:PL\t1/1:36,6,0\n+21\t682421\t.\tT\tG\t7.30814\t.\tDP=2;VDB=0.56;SGB=-0.453602;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,0,2;MQ=20\tGT:PL\t1/1:36,6,0\n+21\t682423\t.\tC\tG\t7.30814\t.\tDP=2;VDB=0.56;SGB=-0.453602;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,0,2;MQ=20\tGT:PL\t1/1:36,6,0\n+21\t682426\t.\tG\tA\t7.30814\t.\tDP=2;VDB=0.56;SGB=-0.453602;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,0,2;MQ=20\tGT:PL\t1/1:36,6,0\n+21\t682568\t.\tA\tG\t8.13869\t.\tDP=5;VDB=0.133161;SGB=-0.590765;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,0,5;MQ=9\tGT:PL\t1/1:37,15,0\n+21\t694476\t.\tC\tG\t18.4764\t.\tDP=6;VDB=0.0341101;SGB=-0.616816;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,6,0;MQ=11\tGT:PL\t1/1:48,18,0\n+21\t694482\t.\tT\tC\t19.4636\t.\tDP=4;VDB=0.131345;SGB=-0.556411;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,4,0;MQ=15\tGT:PL\t1/1:49,12,0\n'
b
diff -r b0f2be869d6d -r 4014de1b6daf test-data/filtered4.bam
b
Binary file test-data/filtered4.bam has changed
b
diff -r b0f2be869d6d -r 4014de1b6daf test-data/rnastar_test.log
--- a/test-data/rnastar_test.log Wed Feb 14 09:03:31 2024 +0000
+++ b/test-data/rnastar_test.log Tue Aug 27 14:11:16 2024 +0000
b
@@ -3,32 +3,35 @@
                                     Finished on | .*
        Mapping speed, Million of reads per hour | .*
 
-                          Number of input reads | 100
-                      Average input read length | 75
+                          Number of input reads | 15447
+                      Average input read length | 119
                                     UNIQUE READS:
-                   Uniquely mapped reads number | 99
-                        Uniquely mapped reads % | 99.00%
-                          Average mapped length | 74.65
-                       Number of splices: Total | 52
+                   Uniquely mapped reads number | 150
+                        Uniquely mapped reads % | 0.97%
+                          Average mapped length | 105.11
+                       Number of splices: Total | 131
             Number of splices: Annotated (sjdb) | 0
-                       Number of splices: GT/AG | 52
-                       Number of splices: GC/AG | 0
-                       Number of splices: AT/AC | 0
-               Number of splices: Non-canonical | 0
-                      Mismatch rate per base, % | 2.00%
-                         Deletion rate per base | 0.00%
-                        Deletion average length | 0.00
-                        Insertion rate per base | 0.00%
-                       Insertion average length | 0.00
+                       Number of splices: GT/AG | 100
+                       Number of splices: GC/AG | 6
+                       Number of splices: AT/AC | 1
+               Number of splices: Non-canonical | 24
+                      Mismatch rate per base, % | 6.68%
+                         Deletion rate per base | 0.04%
+                        Deletion average length | 1.17
+                        Insertion rate per base | 0.04%
+                       Insertion average length | 1.50
                              MULTI-MAPPING READS:
-        Number of reads mapped to multiple loci | 1
-             % of reads mapped to multiple loci | 1.00%
-        Number of reads mapped to too many loci | 0
-             % of reads mapped to too many loci | 0.00%
+        Number of reads mapped to multiple loci | 451
+             % of reads mapped to multiple loci | 2.92%
+        Number of reads mapped to too many loci | 58
+             % of reads mapped to too many loci | 0.38%
                                   UNMAPPED READS:
+  Number of reads unmapped: too many mismatches | 0
        % of reads unmapped: too many mismatches | 0.00%
-                 % of reads unmapped: too short | 0.00%
-                     % of reads unmapped: other | 0.00%
+            Number of reads unmapped: too short | 7912
+                 % of reads unmapped: too short | 51.22%
+                Number of reads unmapped: other | 6876
+                     % of reads unmapped: other | 44.51%
                                   CHIMERIC READS:
                        Number of chimeric reads | 0
                             % of chimeric reads | 0.00%
b
diff -r b0f2be869d6d -r 4014de1b6daf test-data/rnastar_test_splicejunctions_wasp.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rnastar_test_splicejunctions_wasp.bed Tue Aug 27 14:11:16 2024 +0000
b
@@ -0,0 +1,193 @@
+21 13873 52149 1 1 0 0 1 12
+21 13882 20712 2 2 0 0 3 28
+21 13882 29808 2 2 0 0 1 28
+21 13882 35980 2 2 0 0 1 26
+21 13882 50180 2 2 0 0 1 19
+21 13887 27728 1 1 0 0 1 21
+21 13887 35985 1 1 0 0 1 15
+21 13887 51866 1 1 0 0 4 40
+21 13887 62408 1 1 0 0 2 24
+21 13887 88031 1 1 0 0 2 40
+21 13887 152470 1 1 0 2 2 40
+21 13894 50193 1 1 0 0 1 24
+21 13907 27883 2 2 0 0 2 45
+21 13976 19932 2 2 0 0 4 41
+21 13976 88119 2 2 0 0 2 33
+21 13976 108267 2 2 0 0 2 33
+21 13991 30873 2 2 0 0 1 18
+21 13991 52131 2 2 0 0 1 18
+21 15394 46773 1 1 0 0 1 39
+21 18885 50205 2 4 0 0 1 35
+21 18949 28120 1 1 0 0 1 26
+21 18953 27816 2 2 0 0 1 16
+21 18953 37719 2 2 0 1 0 34
+21 19001 46120 2 2 0 0 1 17
+21 19001 52299 2 2 0 0 1 17
+21 19001 63039 2 2 0 0 1 17
+21 19533 60698 2 2 0 1 0 44
+21 19658 51267 1 1 0 0 1 45
+21 19737 28120 1 1 0 0 1 16
+21 19826 51848 1 1 0 1 1 36
+21 19830 37617 1 1 0 0 2 12
+21 19842 20715 1 1 0 0 1 23
+21 19842 51866 1 1 0 1 0 22
+21 19842 62903 1 1 0 0 1 18
+21 19842 88031 1 1 0 0 2 37
+21 20671 27816 2 2 0 0 1 20
+21 20671 52116 2 2 0 0 2 25
+21 20671 62856 2 2 0 0 3 23
+21 20704 30891 1 1 0 0 3 38
+21 20704 37617 1 1 0 0 2 12
+21 20704 52149 1 1 0 0 5 22
+21 20704 120492 1 1 0 0 2 38
+21 20765 51916 2 2 0 0 1 14
+21 20819 30873 2 2 0 0 1 18
+21 20819 52131 2 2 0 0 1 18
+21 25471 61751 2 2 0 0 1 12
+21 26064 51210 2 2 0 0 1 13
+21 27406 35153 1 1 0 0 1 28
+21 27406 51199 1 1 0 1 0 26
+21 27494 38948 2 2 0 0 1 33
+21 27494 46813 2 2 0 1 1 44
+21 27494 75795 2 2 0 0 1 39
+21 27684 52116 2 2 0 0 1 16
+21 27713 51848 1 1 0 0 1 40
+21 27846 29795 1 1 0 1 0 16
+21 27859 29808 2 2 0 0 1 20
+21 27864 35985 1 1 0 0 1 24
+21 27864 45984 1 1 0 0 1 24
+21 27864 51866 1 1 0 1 1 32
+21 27864 62408 1 1 0 0 2 41
+21 27864 74425 1 1 0 0 1 24
+21 27864 88031 1 1 0 1 1 36
+21 27935 30198 1 1 0 0 1 33
+21 27935 62479 1 1 0 0 1 27
+21 27951 88119 2 2 0 0 2 31
+21 28186 46133 1 1 0 0 2 18
+21 28186 63052 1 1 0 1 1 25
+21 29614 31023 1 1 0 0 1 12
+21 29614 63020 1 1 0 0 2 24
+21 29901 37719 2 2 0 1 6 28
+21 29901 88119 2 2 0 0 5 36
+21 29901 91677 2 2 0 0 3 24
+21 29901 108267 2 2 0 0 5 36
+21 29916 30873 2 2 0 0 1 18
+21 29916 52131 2 2 0 0 1 18
+21 29949 52299 2 2 0 0 2 18
+21 30122 62898 2 2 0 0 1 19
+21 30127 37629 1 1 0 0 1 24
+21 30127 51866 1 1 0 0 2 15
+21 30127 88031 1 1 0 1 4 28
+21 30901 62898 2 2 0 0 1 31
+21 30906 51866 1 1 0 0 1 36
+21 30906 62408 1 1 0 0 1 14
+21 35204 75757 1 1 0 1 0 43
+21 35246 51292 1 1 0 0 1 33
+21 35288 38995 1 1 0 0 1 22
+21 35986 45984 1 1 0 0 2 17
+21 35986 88031 1 1 0 0 3 32
+21 35994 50193 1 1 0 0 2 25
+21 35994 51874 1 1 0 0 3 24
+21 35994 74433 1 1 0 0 2 23
+21 36035 51916 2 2 0 0 1 14
+21 36066 37411 1 1 0 0 1 37
+21 36074 46072 2 4 0 0 1 33
+21 37630 51866 1 1 0 0 1 37
+21 37630 62903 1 1 0 0 3 12
+21 37630 81353 1 1 0 0 1 44
+21 37679 51916 2 2 0 0 1 14
+21 38899 46764 1 1 0 0 1 20
+21 38954 51292 1 1 0 0 1 43
+21 38981 75826 1 1 0 0 1 34
+21 38996 60894 1 1 0 0 1 22
+21 45985 88031 1 1 0 0 1 38
+21 46057 62479 1 1 0 0 1 14
+21 46065 143522 1 1 0 0 2 45
+21 46069 88115 1 1 0 1 2 41
+21 46073 88119 2 2 0 0 1 29
+21 46814 70282 2 2 0 0 1 32
+21 46870 51210 2 2 0 1 0 21
+21 50168 62885 1 1 0 0 1 18
+21 50168 81335 1 1 0 0 1 34
+21 50244 137072 2 2 0 0 2 35
+21 50274 88119 2 2 0 0 1 39
+21 50274 91677 2 2 0 0 1 13
+21 50658 51848 1 1 0 1 0 42
+21 51216 75722 2 2 0 0 2 21
+21 51280 56626 2 2 0 0 1 19
+21 51288 75795 2 2 0 0 1 22
+21 51288 149429 2 2 0 0 2 41
+21 51293 60852 1 1 0 1 1 29
+21 51293 117556 1 1 0 0 2 44
+21 51302 60411 2 2 0 0 1 33
+21 51302 60861 2 2 0 0 2 42
+21 51320 75826 1 1 0 0 1 34
+21 51849 62390 1 1 0 0 1 37
+21 51849 62885 1 1 0 1 0 18
+21 51853 52149 1 1 0 0 1 12
+21 51899 52192 1 1 0 0 1 22
+21 51899 120538 1 1 0 1 4 32
+21 51955 206521 2 2 0 1 2 27
+21 52117 62856 2 2 0 0 3 18
+21 52162 62903 1 1 0 1 0 14
+21 52162 81353 1 1 0 0 1 44
+21 52162 88031 1 1 0 1 5 44
+21 52162 98665 1 1 0 0 1 36
+21 52162 152470 1 1 0 0 3 44
+21 52162 289491 1 1 0 1 10 36
+21 52182 62923 2 4 0 0 1 13
+21 52252 62856 2 2 0 1 0 21
+21 52252 88119 2 2 0 0 1 30
+21 52329 63068 1 1 0 0 1 28
+21 57410 60698 2 2 0 0 1 29
+21 57410 75645 2 2 0 0 2 38
+21 57475 60763 2 2 0 0 1 36
+21 60286 60730 2 4 0 1 0 40
+21 60390 60840 2 2 0 0 1 24
+21 60699 75645 2 2 0 0 2 27
+21 62480 98737 1 1 0 0 2 12
+21 62496 108267 2 2 0 0 1 43
+21 62544 63039 2 2 0 0 2 40
+21 62557 63052 1 1 0 0 1 15
+21 62890 120492 1 1 0 0 2 42
+21 62899 81348 2 2 0 0 1 24
+21 62899 108178 2 2 0 0 2 20
+21 62904 88031 1 1 0 1 4 44
+21 62936 120538 1 1 0 0 4 38
+21 62976 80498 1 1 0 1 0 24
+21 62976 137268 1 1 0 0 2 33
+21 63053 81502 1 1 0 0 1 32
+21 63053 88180 1 1 0 0 1 43
+21 70140 75651 2 2 0 0 1 31
+21 70283 75795 2 2 0 0 1 15
+21 70283 149429 2 2 0 2 1 45
+21 74506 143522 1 1 0 0 3 45
+21 74510 88115 1 1 0 0 2 35
+21 75799 117556 1 1 0 1 0 37
+21 75818 117576 2 2 0 0 1 15
+21 76355 87984 2 2 0 0 1 16
+21 76378 91540 1 1 0 0 1 24
+21 81349 115881 2 2 0 0 1 37
+21 81354 115886 1 1 0 0 1 33
+21 81438 88115 1 1 0 0 1 36
+21 88064 120538 1 1 0 1 5 32
+21 88120 91677 2 2 0 0 1 26
+21 91565 115886 1 1 0 0 2 33
+21 91597 120538 1 1 0 0 1 25
+21 98666 143455 1 1 0 1 0 18
+21 98750 138516 1 1 0 0 1 14
+21 108179 115881 2 2 0 0 1 38
+21 108223 152509 2 2 0 0 1 25
+21 112412 138384 2 2 0 0 1 16
+21 115887 152470 1 1 0 0 1 22
+21 115919 120538 1 1 0 0 7 43
+21 126053 137131 1 1 0 0 1 26
+21 131927 138384 2 2 0 0 1 21
+21 136828 136968 2 2 0 0 2 26
+21 137014 289491 1 1 0 0 6 25
+21 137087 137268 1 1 0 0 1 14
+21 137346 138586 1 1 0 0 1 19
+21 138427 152465 2 2 0 1 6 40
+21 138432 152470 1 1 0 0 2 31
+21 138513 143522 1 1 0 0 2 45
+21 682607 689282 2 2 0 1 0 36