Repository 'lumpy_sv'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/lumpy_sv

Changeset 6:48e97429a749 (2024-07-08)
Previous changeset 5:6ae3a402b9af (2020-05-24)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_sv commit 37c0f0938a5cad74c954a89827a6a08bbbb81cb0
modified:
extractSplitReads_BwaMem.py
lumpy.xml
test-data/output_extended.vcf
b
diff -r 6ae3a402b9af -r 48e97429a749 extractSplitReads_BwaMem.py
--- a/extractSplitReads_BwaMem.py Sun May 24 18:48:46 2020 -0400
+++ b/extractSplitReads_BwaMem.py Mon Jul 08 22:23:10 2024 +0000
[
@@ -21,7 +21,7 @@
             continue
         for el in sam.tags:
             if "SA:" in el:
-                if(len(el.split(";"))) <= numSplits:
+                if (len(el.split(";"))) <= numSplits:
                     split = 1
                     mate = el.split(",")
                     mateCigar = mate[3]
@@ -103,7 +103,6 @@
         cigarOps = []
         for opString in cigarOpStrings:
             cigarOpList = atomicCigarSearch.findall(opString)
-#            print cigarOpList
             # "struct" for the op and it's length
             cigar = cigarOp(cigarOpList[0][0], cigarOpList[0][1])
             # add to the list of cigarOps
@@ -120,8 +119,7 @@
             cigar = cigarOp(cigarOpList[0][0], cigarOpList[0][1])
             # add to the list of cigarOps
             cigarOps.append(cigar)
-#            cigarOps = cigarOps
-    return(cigarOps)
+    return cigarOps
 
 
 def calcQueryPosFromCigar(cigarOps):
@@ -202,7 +200,8 @@
                       help='''Include alignments marked as duplicates.
                       Default=False''')
     parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap",
-                      default=20, type="int", help='''minimum non-overlap between
+                      default=20, type="int",
+                      help='''minimum non-overlap between
                       split alignments on the query (default=20)''',
                       metavar="INT")
     (opts, args) = parser.parse_args()
b
diff -r 6ae3a402b9af -r 48e97429a749 lumpy.xml
--- a/lumpy.xml Sun May 24 18:48:46 2020 -0400
+++ b/lumpy.xml Mon Jul 08 22:23:10 2024 +0000
[
b'@@ -1,9 +1,9 @@\n-<tool id="lumpy" name="lumpy-sv" version="1.2.2">\n+<tool id="lumpy" name="lumpy-sv" version="1.3">\n     <description>find structural variants</description>\n     <requirements>\n-        <requirement type="package" version="0.2.13">lumpy-sv</requirement>\n-        <requirement type="package" version="1.3.1">samtools</requirement>\n-        <requirement type="package" version="1.11.2=py27_0">numpy</requirement>\n+        <requirement type="package" version="0.3.1">lumpy-sv</requirement>\n+        <requirement type="package" version="1.18">samtools</requirement>\n+        <requirement type="package" version="1.16.5">numpy</requirement>\n     </requirements>\n     <stdio>\n         <exit_code range="1:" level="fatal" description="Tool exception" />\n@@ -33,7 +33,7 @@\n                     |python $__tool_directory__/pairend_distro.py -r $analysis_type.readLength -X 4 -N $seq_method.additional_params.samplingValue -o input.lib.histo > meandev.txt &&\n                 mean=\\$(cat meandev.txt | sed s/mean:// | sed -r s/stdev:.+//) &&\n                 stdev=\\$(cat meandev.txt | sed -r s/mean:.+stdev://) &&\n-                lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt\n+                lumpy $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt\n                     #if $output_format == "BEDPE":\n                         -b\n                     #end if\n@@ -41,7 +41,7 @@\n                     -sr id:\'$one_sample_bam\',bam_file:input.splitters.bam,back_distance:$seq_method.additional_params.back_distance,weight:$seq_method.additional_params.weight,min_mapping_threshold:$seq_method.additional_params.min_mapping_threshold > \'$vcf_call\'\n             #elif $seq_method.seq_method_list == "single-read":\n                 samtools view -@ \\${GALAXY_SLOTS:-4} -h \'$one_sample_bam\' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -@ \\${GALAXY_SLOTS:-4} -O bam -o input.splitters.bam &&\n-                lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt\n+                lumpy $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt\n                     #if $output_format == "BEDPE":\n                         -b\n                     #end if\n@@ -61,7 +61,7 @@\n                 meanB=\\$(cat meandevB.txt | sed s/mean:// | sed -r s/stdev:.+//) &&\n                 stdevA=\\$(cat meandevA.txt | sed -r s/mean:.+stdev://) &&\n                 stdevB=\\$(cat meandevB.txt | sed -r s/mean:.+stdev://) &&\n-                lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt\n+                lumpy $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt\n                     #if $output_format == "BEDPE":\n                         -b\n                     #end if\n@@ -72,7 +72,7 @@\n             #elif $seq_method.seq_method_list == "single-read":\n                 samtools view -@ \\${GALAXY_SLOTS:-4} -h \'$sample_a_bam\' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -@ \\${GALAXY_SLOTS:-4} -O bam -o input.splitters.bam &&\n                 samtools view -@ \\${GALAXY_SLOTS:-4} -h \'$sample_b_bam\' | python $__tool_directory__/extractSplitReads_BwaMem.py -i stdin | samtools sort -@ \\${GALAXY_SLOTS:-4} -O bam -o input.B.splitters.bam &&\n-                lumpy $seq_method.additional_params.evidence $seq_method.additional_params.probability_curve -mw $seq_method.additional_params.mw -tt $seq_method.additional_params.tt\n+                lumpy $seq_method.additional_params.probab'..b'             </when>\n \n@@ -139,21 +137,21 @@\n     </inputs>\n \n     <outputs>\n-        <data format="tabular" name="histogram" label="Lumpy on ${on_string}: Fragment size distribution" from_work_dir="input.lib.histo">\n+        <data format="tabular" name="histogram" label="Lumpy-sv: Fragment size distribution" from_work_dir="input.lib.histo">\n             <filter>seq_method[\'seq_method_list\'] == "paired-end"</filter>\n         </data>\n-        <data format="tabular" name="histogramB" label="Lumpy on ${on_string}: Fragment size distribution" from_work_dir="input.B.lib.histo">\n+        <data format="tabular" name="histogramB" label="Lumpy-sv: FragmentB size distribution" from_work_dir="input.B.lib.histo">\n             <filter>seq_method[\'seq_method_list\'] == "paired-end"</filter>\n             <filter>analysis_type[\'analysis_type_list\'] == "two_sample"</filter>\n         </data>\n         <data format="bam" name="splits" label="Lumpy on ${on_string}: Split Reads (Bam format)" from_work_dir="input.splitters.bam"/>\n-        <data format="bam" name="splitsB" label="Lumpy on ${on_string}: Split Reads (Bam format)" from_work_dir="input.B.splitters.bam">\n+        <data format="bam" name="splitsB" label="Lumpy on ${on_string}: SplitB Reads (Bam format)" from_work_dir="input.B.splitters.bam">\n             <filter>analysis_type[\'analysis_type_list\'] == "two_sample"</filter>\n         </data>\n         <data format="bam" name="discordants" label="Lumpy on ${on_string}: Discordant Pairs (Bam format)" from_work_dir="input.discordants.bam">\n             <filter>seq_method[\'seq_method_list\'] == "paired-end"</filter>\n         </data>\n-        <data format="bam" name="discordantsB" label="Lumpy on ${on_string}: Discordant Pairs (Bam format)" from_work_dir="input.B.discordants.bam">\n+        <data format="bam" name="discordantsB" label="Lumpy on ${on_string}: DiscordantB Pairs (Bam format)" from_work_dir="input.B.discordants.bam">\n             <filter>seq_method[\'seq_method_list\'] == "paired-end"</filter>\n             <filter>analysis_type[\'analysis_type_list\'] == "two_sample"</filter>\n         </data>\n@@ -165,7 +163,7 @@\n     </outputs>\n \n     <tests>\n-        <test>\n+        <test expect_num_outputs="7">\n             <param name="analysis_type_list" value="two_sample" />\n             <param name="input_file" value="sample_Del.bam" ftype="bam"/>\n             <param name="input_fileB" value="sample_cle.bam" ftype="bam"/>\n@@ -177,7 +175,7 @@\n             <param name="min_mapping_threshold" value="20" />\n             <output name="vcf_call" file="output_two.paired_end.vcf" ftype="vcf"/>\n         </test>\n-        <test>\n+        <test expect_num_outputs="2">\n             <param name="analysis_type_list" value="one_sample" />\n             <param name="input_file" value="sr.input.bam" ftype="bam"/>\n             <param name="seq_method_list" value="single-read" />\n@@ -188,7 +186,7 @@\n             <param name="min_mapping_threshold" value="20" />\n             <output name="vcf_call" file="output.vcf" ftype="vcf"/>\n         </test>\n-        <test>\n+        <test expect_num_outputs="2">\n             <param name="analysis_type_list" value="one_sample" />\n             <param name="input_file" value="sr.input.bam" ftype="bam"/>\n             <param name="seq_method_list" value="single-read" />\n@@ -197,11 +195,10 @@\n             <param name="back_distance" value="10"/>\n             <param name="weight" value="1" />\n             <param name="min_mapping_threshold" value="20" />\n-            <param name="evidence" value="true" />\n             <param name="probability_curve" value="true" />\n             <output name="vcf_call" file="output_extended.vcf" ftype="vcf" compare="sim_size"/>\n         </test>\n-        <test>\n+        <test expect_num_outputs="3">\n             <param name="analysis_type_list" value="two_sample" />\n             <param name="input_file" value="sr.input.bam" ftype="bam"/>\n             <param name="input_fileB" value="sr.input.bam" ftype="bam"/>\n'
b
diff -r 6ae3a402b9af -r 48e97429a749 test-data/output_extended.vcf
--- a/test-data/output_extended.vcf Sun May 24 18:48:46 2020 -0400
+++ b/test-data/output_extended.vcf Mon Jul 08 22:23:10 2024 +0000
[
@@ -31,45 +31,7 @@
 ##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
 ##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sr_input_bam
- Evidence: M00860:26:000000000-A6UGV:1:1101:22421:6659_2 hg38_gold_U07000.1 8 50 hg38_gold_U07000.1 1885 1932 0x17456f0 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:21503:6234_2 hg38_gold_U07000.1 13 52 hg38_gold_U07000.1 1885 1933 0x17451d0 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:14108:4338_2 hg38_gold_U07000.1 8 53 hg38_gold_U07000.1 1879 1932 0x1747410 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:22281:3587_2 hg38_gold_U07000.1 8 52 hg38_gold_U07000.1 1879 1944 0x174d920 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:9129:3504_2 hg38_gold_U07000.1 13 50 hg38_gold_U07000.1 1872 1932 0x1748e60 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:10154:5887_2 hg38_gold_U07000.1 10 41 hg38_gold_U07000.1 1872 1931 0x17499d0 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:9382:5932_2 hg38_gold_U07000.1 8 53 hg38_gold_U07000.1 1871 1921 0x174d580 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:22747:6442_2 hg38_gold_U07000.1 8 52 hg38_gold_U07000.1 1870 1933 0x1748cc0 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:6411:6050_2 hg38_gold_U07000.1 13 49 hg38_gold_U07000.1 1868 1932 0x17486a0 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:23865:5544_2 hg38_gold_U07000.1 13 56 hg38_gold_U07000.1 1868 1923 0x1748480 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:20349:5252_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1868 1931 0x174ce80 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:22571:4203_2 hg38_gold_U07000.1 13 50 hg38_gold_U07000.1 1868 1926 0x1745050 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:16719:4051_2 hg38_gold_U07000.1 8 51 hg38_gold_U07000.1 1868 1932 0x1747b70 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:8961:3182_2 hg38_gold_U07000.1 15 51 hg38_gold_U07000.1 1868 1917 0x1744130 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:18427:2937_2 hg38_gold_U07000.1 8 41 hg38_gold_U07000.1 1868 1932 0x174b760 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:21139:5913_2 hg38_gold_U07000.1 12 50 hg38_gold_U07000.1 1868 1924 0x174d640 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:10523:5853_2 hg38_gold_U07000.1 8 50 hg38_gold_U07000.1 1868 1935 0x174de60 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:16433:5445_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1868 1932 0x174d870 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:12490:4165_2 hg38_gold_U07000.1 13 58 hg38_gold_U07000.1 1868 1917 0x174d9f0 0 - + id:1 weight:1
 hg38_gold_U07000.1 14 1_1 N [hg38_gold_U07000.1:1876[N . . SVTYPE=BND;STRANDS=--:19;EVENT=1;MATEID=1_2;CIPOS=0,0;CIEND=0,2;CIPOS95=0,0;CIEND95=0,0;SU=19;SR=19;PRPOS=1;PREND=0.99996,3.98091e-05,1.58483e-09 GT:SU:SR ./.:19:19
 hg38_gold_U07000.1 1876 1_2 N [hg38_gold_U07000.1:14[N . . SVTYPE=BND;STRANDS=--:19;SECONDARY;EVENT=1;MATEID=1_1;CIPOS=0,2;CIEND=0,0;CIPOS95=0,0;CIEND95=0,0;SU=19;SR=19;PRPOS=0.99996,3.98091e-05,1.58483e-09;PREND=1 GT:SU:SR ./.:19:19
- Evidence: M00860:26:000000000-A6UGV:1:1101:7043:5583_2 hg38_gold_U07000.1 13 56 hg38_gold_U07000.1 1899 1935 0x1742010 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:19472:2185_2 hg38_gold_U07000.1 0 56 hg38_gold_U07000.1 1898 1945 0x1744270 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:9324:6462_2 hg38_gold_U07000.1 0 51 hg38_gold_U07000.1 1898 1933 0x1748fd0 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:23764:6273_2 hg38_gold_U07000.1 13 48 hg38_gold_U07000.1 1898 1945 0x17490a0 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:7772:5906_2 hg38_gold_U07000.1 0 54 hg38_gold_U07000.1 1898 1942 0x1747240 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:6971:4906_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1898 1935 0x1746170 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:10511:4776_2 hg38_gold_U07000.1 8 50 hg38_gold_U07000.1 1898 1934 0x174a840 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:19677:3538_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1898 1940 0x174c190 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:22109:4922_2 hg38_gold_U07000.1 13 53 hg38_gold_U07000.1 1897 1933 0x1743ac0 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:14027:4470_2 hg38_gold_U07000.1 0 51 hg38_gold_U07000.1 1896 1933 0x174b500 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:22736:5607_2 hg38_gold_U07000.1 13 57 hg38_gold_U07000.1 1893 1945 0x174cf30 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:10768:5508_2 hg38_gold_U07000.1 13 56 hg38_gold_U07000.1 1893 1935 0x17480b0 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:14449:3437_2 hg38_gold_U07000.1 13 48 hg38_gold_U07000.1 1893 1933 0x174b150 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:18009:3360_2 hg38_gold_U07000.1 13 47 hg38_gold_U07000.1 1893 1925 0x1749390 0 + - id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:16615:6513_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1893 1931 0x174b860 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:19235:5076_2 hg38_gold_U07000.1 13 45 hg38_gold_U07000.1 1893 1932 0x174a790 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:14629:4828_2 hg38_gold_U07000.1 8 50 hg38_gold_U07000.1 1893 1932 0x174b360 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:22780:4994_2 hg38_gold_U07000.1 13 50 hg38_gold_U07000.1 1892 1924 0x174aaf0 0 - + id:1 weight:1
- Evidence: M00860:26:000000000-A6UGV:1:1101:12387:3929_2 hg38_gold_U07000.1 13 51 hg38_gold_U07000.1 1892 1931 0x174c000 0 - + id:1 weight:1
 hg38_gold_U07000.1 10 2_1 N [hg38_gold_U07000.1:1897[N . . SVTYPE=BND;STRANDS=--:19;EVENT=2;MATEID=2_2;CIPOS=-1,0;CIEND=-7,5;CIPOS95=0,1;CIEND95=-2,1;IMPRECISE;SU=19;SR=19;PRPOS=9.99999e-13,9.99999e-07;PREND=4.89496e-31,1.94872e-23,7.75799e-16,3.08851e-08,0.0308851,0.0775799,0.194872,0.489496,0.194872,0.0122956,1.94872e-09,4.89496e-17,1.22956e-24 GT:SU:SR ./.:19:19
 hg38_gold_U07000.1 1897 2_2 N [hg38_gold_U07000.1:10[N . . SVTYPE=BND;STRANDS=--:19;SECONDARY;EVENT=2;MATEID=2_1;CIPOS=-7,5;CIEND=-1,0;CIPOS95=-2,1;CIEND95=0,1;IMPRECISE;SU=19;SR=19;PRPOS=4.89496e-31,1.94872e-23,7.75799e-16,3.08851e-08,0.0308851,0.0775799,0.194872,0.489496,0.194872,0.0122956,1.94872e-09,4.89496e-17,1.22956e-24;PREND=9.99999e-13,9.99999e-07 GT:SU:SR ./.:19:19