Previous changeset 2:6a87478ed985 (2020-06-18) Next changeset 4:a1b70f038b4a (2020-10-13) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit e80b649094384fc6d7a8f917300db3550cc99a44" |
modified:
macros.xml medaka_consensus.xml test-data/basecalls.fastq test-data/variants.vcf.gz |
added:
annotateVCF.py test-data/annotate_vcf_test.pileup test-data/annotate_vcf_test.vcf test-data/annotated.vcf test-data/bwa-mem-mt-genome.fa.fai test-data/medaka_test.bam test-data/medaka_test.bam.bai test-data/medaka_test.hdf test-data/ref.fasta test-data/ref.fasta.fai |
b |
diff -r 6a87478ed985 -r 35666d44fe7a annotateVCF.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annotateVCF.py Tue Sep 01 03:08:04 2020 -0400 |
[ |
b'@@ -0,0 +1,398 @@\n+#!/usr/bin/env python3\n+\n+# Takes in VCF file and a samtools mpileup output file\n+# Fills in annotation for the VCF file including AF, DP\n+# SB, and DP4\n+#\n+# Usage statement:\n+# python annotateVCF.py in_vcf.vcf in_mpileup.txt out_vcf.vcf\n+#\n+# Can generate in_mileup.txt with samtools mpileup (and can restrict which sites to generate pileups for with in_vcf.vcf)\n+\n+# 08/24/2020 - Nathan P. Roach, natproach@gmail.com\n+\n+import sys\n+from math import isnan, log10\n+\n+from scipy.stats import fisher_exact\n+\n+\n+def pval_to_phredqual(pval):\n+ return int(round(-10. * log10(pval)))\n+\n+\n+def parseSimpleSNPpileup(fields, ref_base, alt_base):\n+ base_to_idx = {\n+ \'A\': 0,\n+ \'a\': 0,\n+ \'T\': 1,\n+ \'t\': 1,\n+ \'C\': 2,\n+ \'c\': 2,\n+ \'G\': 3,\n+ \'g\': 3\n+ }\n+\n+ base_to_idx_stranded = {\n+ \'A\': 0,\n+ \'T\': 1,\n+ \'C\': 2,\n+ \'G\': 3,\n+ \'a\': 4,\n+ \'t\': 5,\n+ \'c\': 6,\n+ \'g\': 7\n+ }\n+ ref_base2 = fields[2]\n+ counts = [0, 0, 0, 0]\n+ stranded_counts = [0, 0, 0, 0, 0, 0, 0, 0]\n+ ref_idx = base_to_idx[fields[2]]\n+ dp = int(fields[3])\n+ carrot_flag = False\n+ ins_flag = False\n+ ins_str = ""\n+ ins_len = 0\n+ insertion = ""\n+ del_flag = False\n+ del_str = ""\n+ del_len = 0\n+ deletion = ""\n+ # dollar_flag = False\n+ for base in fields[4]:\n+ if carrot_flag:\n+ carrot_flag = False\n+ continue\n+ if ins_len > 0:\n+ insertion += base\n+ ins_len -= 1\n+ continue\n+ if del_len > 0:\n+ deletion += base\n+ del_len -= 1\n+ continue\n+ if ins_flag:\n+ if base.isdigit():\n+ ins_str += base\n+ else:\n+ ins_len = int(ins_str) - 1\n+ insertion = base\n+ ins_flag = False\n+ elif del_flag:\n+ if base.isdigit():\n+ del_str += base\n+ else:\n+ del_len = int(del_str) - 1\n+ deletion = base\n+ del_flag = False\n+ else:\n+ if base == \'^\':\n+ carrot_flag = True\n+ continue\n+ elif base == \'$\':\n+ continue\n+ elif base == \'+\':\n+ ins_flag = True\n+ elif base == \'-\':\n+ del_flag = True\n+ elif base == \'.\':\n+ counts[ref_idx] += 1\n+ stranded_counts[base_to_idx_stranded[ref_base2]] += 1\n+ elif base == \',\':\n+ counts[ref_idx] += 1\n+ stranded_counts[base_to_idx_stranded[ref_base2.lower()]] += 1\n+ elif base == \'N\' or base == \'n\':\n+ continue\n+ elif base == \'*\':\n+ continue\n+ else:\n+ counts[base_to_idx[base]] += 1\n+ stranded_counts[base_to_idx_stranded[base]] += 1\n+ af = float(counts[base_to_idx[alt_base]]) / float(sum(counts))\n+ if float(sum(stranded_counts[0:4])) == 0:\n+ faf = float("nan")\n+ else:\n+ faf = float(stranded_counts[base_to_idx_stranded[alt_base]]) / float(sum(stranded_counts[0:4]))\n+ if float(sum(stranded_counts[4:])) == 0:\n+ raf = float("nan")\n+ else:\n+ raf = float(stranded_counts[base_to_idx_stranded[alt_base.lower()]]) / float(sum(stranded_counts[4:]))\n+ dp4 = [stranded_counts[base_to_idx_stranded[ref_base]],\n+ stranded_counts[base_to_idx_stranded[ref_base.lower()]],\n+ stranded_counts[base_to_idx_stranded[alt_base]],\n+ stranded_counts[base_to_idx_stranded[alt_base.lower()]]]\n+ return (dp, af, faf, raf, dp4)\n+\n+\n+def parseIndelPileup(fields, ref_base, alt_base):\n+ counts = [0, 0, 0, 0, 0, 0, 0, 0, 0] # indel ref match, indel fwd ref match, indel rev ref match, indel alt match, indel fwd alt match, indel rev alt match, other, other fwd, other rev\n+ ref_base2 = fields[2]\n+\n+ carrot_'..b'xamine = {}\n+ for line in in_vcf:\n+ if line[0:2] == "##":\n+ out_vcf.write(line)\n+ elif line[0] == "#":\n+ out_vcf.write("##annotateVCFVersion=0.1\\n")\n+ out_vcf.write("##INFO=<ID=DP,Number=1,Type=Integer,Description=\\"Raw Depth\\">\\n")\n+ out_vcf.write("##INFO=<ID=AF,Number=1,Type=Float,Description=\\"Allele Frequency\\">\\n")\n+ out_vcf.write("##INFO=<ID=FAF,Number=1,Type=Float,Description=\\"Forward Allele Frequency\\">\\n")\n+ out_vcf.write("##INFO=<ID=RAF,Number=1,Type=Float,Description=\\"Reverse Allele Frequency\\">\\n")\n+ out_vcf.write("##INFO=<ID=SB,Number=1,Type=Integer,Description=\\"Phred-scaled strand bias at this position\\">\\n")\n+ out_vcf.write("##INFO=<ID=DP4,Number=4,Type=Integer,Description=\\"Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases\\">\\n")\n+ out_vcf.write(line)\n+ else:\n+ fields = line.strip().split()\n+ if fields[0] in to_examine:\n+ to_examine[fields[0]][int(fields[1])] = (fields[3], fields[4])\n+ else:\n+ to_examine[fields[0]] = {int(fields[1]): (fields[3], fields[4])}\n+ in_vcf.close()\n+ data = {}\n+\n+ # Populate data dictionary, which relates chromosome and position to the following:\n+ # depth of coverage\n+ # allele frequency\n+ # forward strand allele frequency\n+ # reverse strand allele frequency\n+ # dp4 - depth of coverage of ref allele fwd strand, DOC of ref allele rev strand, DOC of alt allele fwd strand, DOC of alt allele rev strand\n+ for line in in_mpileup:\n+ fields = line.strip().split()\n+ if fields[0] not in to_examine:\n+ continue\n+ if int(fields[1]) not in to_examine[fields[0]]:\n+ continue\n+ (ref_base, alt_base) = to_examine[fields[0]][int(fields[1])]\n+ if len(ref_base.split(\',\')) > 1: # Can\'t handle multiple ref alleles\n+ continue\n+ if len(alt_base.split(\',\')) > 1: # Can\'t handle multiple alt alleles\n+ continue\n+ if len(ref_base) > 1 or len(alt_base) > 1:\n+ if len(ref_base) > 1 and len(alt_base) > 1: # Can\'t handle complex indels\n+ continue\n+ data[(fields[0], int(fields[1]))] = parseIndelPileup(fields, ref_base, alt_base)\n+ if len(ref_base) == 1 and len(alt_base) == 1:\n+ data[(fields[0], int(fields[1]))] = parseSimpleSNPpileup(fields, ref_base, alt_base)\n+ in_mpileup.close()\n+ # Reopen vcf, this time, skip header, annotate all the sites for which there is an entry in data dictionary\n+ # (Sites without entries have either multiple ref or alt bases, or have complex indels. Not supported (for now), and not reported as a result)\n+ in_vcf = open(in_vcf_filepath, \'r\')\n+ for line in in_vcf:\n+ if line[0] == \'#\':\n+ continue\n+ fields = line.strip().split(\'\\t\')\n+ if (fields[0], int(fields[1])) not in data:\n+ continue\n+ (dp, af, faf, raf, dp4) = data[(fields[0], int(fields[1]))]\n+ dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]]\n+ _, p_val = fisher_exact(dp2x2)\n+ sb = pval_to_phredqual(p_val)\n+ if fields[7] == "":\n+ info = []\n+ else:\n+ info = fields[7].split(\';\')\n+ info.append("DP=%d" % (dp))\n+ info.append("AF=%.6f" % (af))\n+ if isnan(faf):\n+ info.append("FAF=NaN")\n+ else:\n+ info.append("FAF=%.6f" % (faf))\n+ if isnan(raf):\n+ info.append("RAF=NaN")\n+ else:\n+ info.append("RAF=%.6f" % (raf))\n+ info.append("SB=%d" % (sb))\n+ info.append("DP4=%s" % (\',\'.join([str(x) for x in dp4])))\n+ new_info = \';\'.join(info)\n+ fields[7] = new_info\n+ out_vcf.write("%s\\n" % ("\\t".join(fields)))\n+ in_vcf.close()\n+ out_vcf.close()\n+\n+\n+if __name__ == "__main__":\n+ annotateVCF(sys.argv[1], sys.argv[2], sys.argv[3])\n' |
b |
diff -r 6a87478ed985 -r 35666d44fe7a macros.xml --- a/macros.xml Thu Jun 18 05:07:32 2020 -0400 +++ b/macros.xml Tue Sep 01 03:08:04 2020 -0400 |
b |
@@ -5,6 +5,7 @@ <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">medaka</requirement> + <requirement type="package" version="1.4.1">scipy</requirement> </requirements> </xml> <xml name="version_command"> |
b |
diff -r 6a87478ed985 -r 35666d44fe7a medaka_consensus.xml --- a/medaka_consensus.xml Thu Jun 18 05:07:32 2020 -0400 +++ b/medaka_consensus.xml Tue Sep 01 03:08:04 2020 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="medaka_consensus_pipeline" name="medaka: Consensus pipeline" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@"> +<tool id="medaka_consensus_pipeline" name="medaka: Consensus pipeline" version="@TOOL_VERSION@+galaxy2" profile="@PROFILE@"> <description>Assembly polishing via neural networks</description> <macros> <import>macros.xml</import> @@ -19,7 +19,7 @@ -i '$i' -d '$d' -|& tee '$out_log' +2>&1 | tee '$out_log' ]]></command> <inputs> <param argument="-i" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="Select basecalls"/> @@ -76,12 +76,14 @@ </output> <output name="out_probs"> <assert_contents> - <has_size value="192187" delta="100"/> + <!-- <has_size value="192187" delta="100"/> --> + <has_size value="165271" delta="100"/> </assert_contents> </output> <output name="out_calls"> <assert_contents> - <has_size value="533242" delta="100"/> + <!-- <has_size value="533242" delta="100"/> --> + <has_size value="343197" delta="100"/> </assert_contents> </output> </test> @@ -96,24 +98,29 @@ <assert_contents> <has_n_lines n="65"/> <has_line line=">4 length=3792 depth=114.52x circular=true"/> - <has_line line="CATCTCTTT"/> + <has_line line="ATCTCTTT"/> </assert_contents> </output> <output name="out_probs"> <assert_contents> - <has_size value="192710" delta="100"/> + <has_size value="166251" delta="100"/> + <!-- <has_size value="192710" delta="100"/> --> </assert_contents> </output> <output name="out_calls"> <assert_contents> - <has_size value="533163" delta="100"/> + <has_size value="343197" delta="100"/> + <!-- <has_size value="533163" delta="100"/> --> </assert_contents> </output> <output name="out_draft"> <assert_contents> - <has_n_lines n="6"/> - <has_line line="chain 3789 4 3792 + 0 3792 4 3789 + 0 3789 1"/> - <has_line line="1204"/> + <has_line line="chain 3788 4 3792 + 0 3792 4 3788 + 0 3788 1"/> + <has_line line="1409 1 0"/> + <has_line line="440 1 0"/> + <has_line line="736 1 0"/> + <has_line line="853 1 0"/> + <has_line line="350"/> </assert_contents> </output> <output name="out_polished"> |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/annotate_vcf_test.pileup --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotate_vcf_test.pileup Tue Sep 01 03:08:04 2020 -0400 |
b |
@@ -0,0 +1,3 @@ +NC_045512.2 45 G 31 CCCCACCCCCCccccccccc,ccCCC*,+3ttccCC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +NC_045512.2 685 A 75 .-9AAGTCATTT,-9aagtcattt,-9aagtcattt*.-9AAGTCATTT*G.-9AAGTCATTT.-9AAGTCATTT,-9aagtcattt*,-9aagtcattt,-9aagtcattt,-9aagtcattt,-9aagtcattt,,-9aagtcattt,-9aagtcattt,-9aagtcatttG.-9AAGTCATTT*,-9aagtcatttG.-9AAGTCATTT.-9AAGTCATTTG.-9AAGTCATTTG.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT*.-9AAGTCATTT*.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTTG.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT.-9AAGTCATTT*,-9aagtcattt,-9aagtcattt*,-9aagtcattt,-9aagtcattt,-9aagtcattt,-9aagtcattt,-9aagtcattt,-9aagtcattt*,-4aagt,-9aagtcattt,-9aagtcattt**G,-9aagtcattt,-9aagtcattt,-9aagtcatttG,-9aagtcattt,-9aagtcattt*,-9aagtcattt* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +NC_045512.2 1813 T 51 ..-3AAA..-2AA..-1A*.-1A.-3AAA*..-1A..-1A.-2AA.-1A.-3AAA.-2AA.-2AA.-3AAA.,-1a,-1a,-6aaaaaa,,,-1a,,-2aa,-2aa,,-1a,-1a,*,-1a,-1a,+1a,-2aa,-1a,+1a,-1a,,-2aa.-3AAA,-1a.-2AA,-1a*,-2aa,-2aa ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/annotate_vcf_test.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotate_vcf_test.vcf Tue Sep 01 03:08:04 2020 -0400 |
b |
@@ -0,0 +1,9 @@ +##fileformat=VCFv4.1 +##medaka_version=1.0.3 +##contig=<ID=NC_045512.2> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Medaka genotype."> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Medaka genotype quality score"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +NC_045512.2 45 . G C 46.359 PASS GT:GQ 1:46 +NC_045512.2 685 . AAAGTCATTT A 260.487 PASS GT:GQ 1:260 +NC_045512.2 1813 . TA T 11.034 PASS GT:GQ 1:11 |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/annotated.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated.vcf Tue Sep 01 03:08:04 2020 -0400 |
b |
@@ -0,0 +1,16 @@ +##fileformat=VCFv4.1 +##medaka_version=1.0.3 +##contig=<ID=NC_045512.2> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Medaka genotype."> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Medaka genotype quality score"> +##annotateVCFVersion=0.1 +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw Depth"> +##INFO=<ID=AF,Number=1,Type=Float,Description="Allele Frequency"> +##INFO=<ID=FAF,Number=1,Type=Float,Description="Forward Allele Frequency"> +##INFO=<ID=RAF,Number=1,Type=Float,Description="Reverse Allele Frequency"> +##INFO=<ID=SB,Number=1,Type=Integer,Description="Phred-scaled strand bias at this position"> +##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +NC_045512.2 45 . G C 46.359 PASS DP=31;AF=0.900000;FAF=0.937500;RAF=0.857143;SB=6;DP4=0,2,15,12 GT:GQ 1:46 +NC_045512.2 685 . AAAGTCATTT A 260.487 PASS DP=75;AF=0.880000;FAF=0.789474;RAF=0.972973;SB=0;DP4=0,0,30,36 GT:GQ 1:260 +NC_045512.2 1813 . TA T 11.034 PASS DP=51;AF=0.607843;FAF=0.521739;RAF=0.678571;SB=0;DP4=0,0,12,19 GT:GQ 1:11 |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/basecalls.fastq --- a/test-data/basecalls.fastq Thu Jun 18 05:07:32 2020 -0400 +++ b/test-data/basecalls.fastq Tue Sep 01 03:08:04 2020 -0400 |
b |
b"@@ -498,519 +498,3 @@\n GTTGTACTTCGTTCGGTTACGTATTGCTGCGCGCGCACTCTCTGTCGTCAACGGCGTCAGATATTATTATAAGAGACAGCTCCTTCACCAAGCATTGATATTTCATTTAATTCGCCAGTCCTATATAAAAAAAATCTCTTTGTTTGATCTGTTGCTATTATTGCAATATCGTTAATTCTATTATCTATTCTTTTTAGGTTACCTATTATTTCATCTTTTCTTTTTGTGTTAGTAGTTTTGATACGTTTTATCGTTGCTTCTTCATTTATGTTTCTTGATGTGGTTATTATCGCTTGCGGTTTTGAATTCATATAGCTTACCTTTTTTAGTTTGTTTTATTTCTCCGTTTATTTCTTGTGCTTTTTTGTGTTATTTTTATAGTAATCAATTCTTCTGGGGGTTCTTTTTCCGTTCTGTGTTTCACTAACCTTAATCATTTAAAACATTTCCCCCCTTTCCTGAGAGCCGAGGGGTTTGTCGCAATGGAGGGTGTTTGATAATATTGACTCAACATTGGGGGACACGCTTGATTCTGTCTCTTATATACAATCTCCGAGCCCACGAGACCAGAGTGCGCGCAGCAATGCGTAACT\n +\n )8;541>AA?9F3@'.FD:->?E?D?AB56<70+)*&%)/02-2<<67;$$/$06518..1(*',)&(24')$&)16:6@@89EA=3)703,3''+0=85MG>B>@C?:;:,>$%%).))6-&'&%BD???@>=:?B;<?D>76750:>C9EE9D=@<8./1-.;>GE?A<NIDDECAGCFIC>BECA@;D7==491<E1%==A9HE<DD>D@>?@CM;)=@KHD9BABA98@A<BENCA;:6;3<=CAA@@@FI>78834.''24DA99@BI?MFFDD<>27*=<:?($4$%.11705A@MD?<8A=@7<@4C1ACCB@>GDB?8,1(8=F==:?A?FCC9A3><<=;79<C==<%?)9>?IG>='>,AF=CAJIC6AB@>0?9<68&*8K@C?467.<=5134LID564>:621=ACEH<@E:8<343/BA7C779<?AD@>9?BD23DB;889@:@=>=**342,+68:4CELD@G2C?@@BDC9)=GDHH@E@FBBGBGJEDBBA007/65$%<?FKB=B?((+EBJ>>?;00:;9;60+&&;798>>&924&ACG8>>=BC@?8:2;EA7.BB1+481254D>:5/#%\n-@f2a18031-a79d-4fd4-8200-836066a428c6\n-GTGTGTATTACTTCGTTCAGTTACGTATTGCTGAAAAAGAAACAGTGGCATAGTGATGCATAAATAAACACATTATTACTATGTAAACGTAAACAGAAATGGAGAGCAAGTTTAAACATGATAAAGATATCCATAAAAAGAAATTTTAAAGTGTTTCGAGGAACATAACTCTCAAACAACTAAAAACACTTAATGTAATATCCGGAAAAAACAACTTTTGGGAAAACATCGATATTAGACTGGAAATATTTTCATTTATGACGTAAAAAAACCCAGCAGCTCTGTTAAAGTAGCTTTAGAAATGAAGCATCTCTTTTAAATAAAGCTCATTTGGGATAGCTATTTTTACAACATGGACACATCAAAGAAATATCCATAACAATAAGAGATGAAAGGTCAAGAAGTAACACAAAAATACTCAACGATAGATAACACCTAGAAAGTCTGAATCAAGCGTGTCCCTCAATGTTGAGTCAATATTATCAAACACCCCTCCATTTACGACAAACCCTCGGCTCTCAGGAAAAGGGAATGTTTTAATGATTAAGGTTAAGTGAAGCCACAGAACGGAAAAAGAACCCCCCAGAAGAATTGATTACTATAAAAATAACACAAAAAGCACAAGAAATAAACGGAGAAATAAACAAACCCCAAAGGTAAGCTATATGAATTCAAAACCGCAGCGATAATAACCACATCAAAAACATAAACAAAGAAGCAACGATAAAAAACGTATCCAAAACTACTAACACAAAGAAAAGATGAAATAATAGGCAACCTAAAAAGAATAGATAATAGAATTAACGATATTGCAATAATAGCAACAGATTAAACAAAGAGATTTTTAGATATAGGACCTGTCTCTTATACACATCTGACGCTGCCGACGACAGAGAGTGCGCGCGCAACAATACGTAAC\n-+\n-#(%$%&%'+0..=<<HF>9CC=5D@EFEE<=1,,:F4)*9>?</2D:::=<;7><EM;D:=7;??HJC;:G<<=7@BAAAI=AGHKF09>DIHD98?2;=*,/*(*;+,9+2EBDA?/6>?:672&?=A67=&)GK6(%+19:;?B>*+**86DC@/*(&&-)2:AAC;9*',$(2?::<?FHJGA89004DF@AAFID>**7/5>GQID=A55:;950(,?3:GG@C?B<;7344&&1%%#$$.;99:97./-.2AE?F<556;<B@;934/(&)255BC@>=:<55,21=0>JI>94453759>>=878,*/?>:BEA9>?-$170,03504>@77:>=@>?IB;--''??;78?>D?.(::;/')).&&**6*:89?@A;554*6?<867*/563&&%0(,0..-A;BC==.((:@A)&$,,:8:?B??>:(((/),-*)(*$&$)1346;68<947<;<B7:=;?==5?:@=>ABD@<99%:7<>=>>::,?..2-%%//=FGFGGD@@9=C@6?6049:?6+03>AK@EHIHACG>A?DA88>;A=,++*%$+)&+16B2C=7%-5D53++;FA@G@?%:':51'+&-:9>CE>CE>BCBBA?>=<'0.00.+*8+://,+.AA<G7C7860)50)&&13445-)(-65)&'858+8<;73))%&=A8BB=B=>30%24?FCADBAC?9(+/)=EG><CAA?IQHAKN;,+,.?+,=:E?GIQMJE70,)%$//122,1147976B?KH2'@=8&127:38A<458--7@68;&9FNH3%2<IBECKD@A2--9<7:789.13033,9,,)<=@D9953,-&&(*).97;;'75;6BCRF557?:41.1=?99::;=D=;<0,,.4B25A:$&1-.0025A1;9-7,,.',(6&9?.B>@C-.;76337.2-,'\n-@435ec27b-52bf-4575-b4f5-fcb8892a768b\n-CTATATCTAAAAAATCTCTTTGTTTTGAAGTCTGTTGCTATTATTACAATATCGTTAATTCTATTATCTATTCTTTTAGGTTGCCTATTGTTTCATCTTTTCTTTTTGTGTTAGTAGTTTTTGATACGTTTTTTTATCGTTGCTTCTTTGTTTATGTTTCTTGATGTAGATTTATTATCAACTGCGTTTTAGATTCGTATAAGCTTACCTTTTTAGTTTGTTTTATTTCTCCGTTTGTTTCTTATTGCTTTTGTGTTTTTATAGTAATCAATTCTTCTGGGGGTTCTTTTTCCGTTTCTGTGTTTCACTAACCCAATCATTAAAACATTCCCTTTTCCCCCTGAGAGCCGAGGGTTTGTCGCAATGGAGGGAGTGTTTGAT\n-+\n-;4@:7360AEC=B?77E9GFJ>??H1+,-'484.67258251(4.%0)-66448883434=6</1<<=?;@?;4=L3'47?@4<=?:9:&899@?AADBPE<A@C?E:2%:705(%;@BLJ@>;,%827?SKD=B?<<<HD=679;;;83?@-$$$,/<76982.*+$+0BE@;;;5.1$$$*%%%2574(':B<5#$$$'')***76:?@HD<6;=?@3=@E93<@@::$21675$//7/.,%$(&$&&&)(2&.0??=<0,,5<90='*/'/04--',('9.4;<>HBA,-$%,=80168@D:-5/.-'$##08=??HE@A>4.$)+/2/0255,,*&%*)3-#)99B40'?HHD0-0(/..<<:<1*(%%5,?<',+*\n-@7e5675d5-2114-45cc-917e-631462362ae5\n-CTGATGTGCTTCGTTCAGTTACGTATTGCTGCGCGCACTCTCTGTCGTCGAG"..b"TCCTGTTTTCGGGGTAAAACATCTCTTT\n-+\n-(?A@=CE-3()(49))6,69>H>::/(79,%3(0:&&%*3)-0/23-5$)**+&6;<),%:CC=5?7562**'%%61>=?:;FDF@?:4>56>?3/(<;)0*02/36<<;AAABCDFE::JIF<9;5:4<:?>@-)'3'.1$&&$&,;:00C?'4=>((,@D9H?<B<>@B<;=HG=<L(=()(&7<>@=F<;CB>D?=\n-@7a97d860-e33a-4e27-b5be-c4754a2eb907\n-ATTACGAAGTATCACGTGCCCTGTTATCCAGACTTATGGCAGATTAGCTTCCCGGAGAGAAAACTGTCGAAAACAGACGGTATGAACACCGTAAGCTCCAAAGTGATCACCATTCGCTTTCATGCGCCTTGGCTATGCAGCGAGCTGAAAACGATCCTGACGCATCCTTCCTGTTTTCCCGAAGGGTAAAACATCTCTTT\n-+\n-?>A?'-$%+?'%8$03F;;;=8*57>((?3:E?E?65%,0,.,B>77?AA6<G<9C(,):0IGAD=;AEIJLC@'?3A+/68966>9?<>??:</:<-.'(%32228@;:=CGDA>78:@97424%%$'%')460((8,:;1/4////@;5;?@//5896:.+?:<ACA>@KEA@LD=C22.+01849?D<>@;A=<K5/\n-@cc6e152f-3be2-4154-95c1-265c4f77a7ac\n-TACGGAGTATCACCGTGCCTGTTATCCAGACTTATGGCAGATTAGCTTCCCGGAGAAAACTGCTCCAACAGACGGTATGAACACCGTAAGCTCCCAGAAGTGATCACCATTCGCTTTCTCATGCATAGCTATGCGGCGAGCTGAAAAGCTGATCCTGACGCATCCTTCCTGTTTTTCCCGGGAGTAAAACATCTCTTT\n-+\n-,*635,44()6,87.6'66=(@;0**E=DFCL?77-56732,/(-./1-.//17&(5EE;:)')(6,,4=8>C29;5>8832,>>459;+;;)-(+$%$*77;6CAABDB?-0*+,(&$+&**8:85=??@))?%.7+*4/+(*.'($#%,,++4;EBB:B>78=8<=@GBDCGB@5;A+1<$6:EGDAG;:>>@@?H\n-@749ba682-4ec2-4367-8bb3-51878f06992d\n-CCGGGAAAGCGGAAGGATCGTCAGGATCGTTGCTACAGCTCTGCTACATATGCATGAAAGCGAATGGTGATGCTTTGGGACTTTACGGTGTTTCATACCGTCTGTTTTCGACAGTTTTCTCTCCAGGGAAACTAATCTGCCAGCAAGTCTGGATAACAGGG\n-+\n-#$$(+('&%,$)5;:8/&(,121165++.-/$$(%)(%#%'#$$$$&&%$%(3/1<222(666:=8<47/&$&)),&'&',-/.39=B>B?=-89;=>=;;;335686E;=9499=PA@-8<%/'/*+,)$77=7/-5//.-&(()**7',./102,&,&'\n-@3d6bdcab-ef5c-4ff8-a928-23066fb4e887\n-CTGTTATCAGACTTATGGCAGATTAGCTTCCCGGAGAAGAAAACTGTCGAAAACAGACGGTATGAACACCGTAAGCTCCCAAAAGTGATCACCATTCATCATGCATAGCTATGCGGGCAGACTGCGAAAACGATCCTGACTCCTTCCTGTTTTCCGGGGTAAAACATCTCTTT\n-+\n-12551)$>,,/,,,AE??>==?6/.'''-8>B03<+,,-=CA@<A@<A98=<17581:*0<BC66@AA<>;;/09:6470999-%171+98==8<;6#'36;AC;7:689<<89%*./(&$++$-+--<-;=>55:4;*%&%*%/454@CAHA/318?G?4599=9?DEA@?E\n-@0b41ea60-ea37-43ef-a912-260c4d33a356\n-CTGTTATCCAGACTTATGGCAGATTAGCTTCCGGAGAAAACTGCTTCGAAAAACAGACGGTATGAACACCGTAAGCTCCCAAAGTGATCACCATTCGCTTTCATGCATAGCGCTATGCAGCGAGCTGAAAACGATCTGACGCATCCTTCCTGTTTTCCCGGGGTAAAACATCTCTTT\n-+\n-=B>B3+++7-2-))%&.%,3**749689:=1B0.3&)(-'$$%#%(*(((('$,)+(/+,,/48:>;62+7:)*.<;9>@DE75@?<?GFGD@DDA$$=>;>?::>-043+).2?;<(&551.;><:8G@6E@<6@7<6@1CE78BDB>?=?57I@'-G3;@B=>@F@?8<?>794-\n-@cd8d2944-a807-434b-92d5-bfa925697c85\n-GTTATCCAGACTTATGGCAGATTAGCTTCCCGGAGAGAAAACTGGCATGAAAACAGACAGGTATGAACACCGTAAACTCGGTGATCACCATTCGCTTTCATGCATAGCTATGCAGCGAGCTGAAAACGATCCTGACGCATCTCCTTCCTGTTTTCCGGGGTAAAACATCTCTTT\n-+\n-8?>>%%06/B@FEFG7=854742')9<=+3B9:<98%;<>3..-(&$*/13,*;8;1+&59ADB55<94<<:;99(&)+#&62-'('668:7855;=C><24B=>==>;B<:30=;=1;9575IF9A=E55<:@?GC;77=069<=25152/56)/,45?>@BE162;>:9DCE\n-@a492dd23-2c03-4d9b-a606-dee7268bd4fa\n-AAAGAGATGTTTTTGCGGAAAACAGGAAGGATGCGTCAGGATCGTTTTCAGCGCTATAGCTATGCATGAAAGCGAATGGTGATCCTTTGGGAGCTTACGGTGTTCATGCCAATCTGTTTTCGACAGTTTCTCTCCGAGGAAGCTAATCTGCCGCAAGTCTAGTTAAC\n-+\n-F:&-/77;:@??=8%%$(#'+()%+9.99AB?==<<7/;;4/;;B>?-+(*2%%&0-1-=5;99GD=88>9>??=IC2:B?:%%*#*-&.1**,--;A::8BID?4&&%8'(<AE3??;C@>B64>:;31&*,(5('7A04.>@CC;AA7%1-1%&-/5'%*%-+,/\n-@4a58265a-1d25-47a1-b851-e7687203734f\n-GGGAAACCCAGGAGGATCGTCAGGATCGTTTTTTCAGCTCGCTGCTATGTATGCGCTGAAAGCGAATGGTGATCACTTTGGGGAGCTTACGGTGTTCATACTGTCTGTTTTCGACAGTTTTCTCTCCGGGAAGCTAATCTGCCATAAGTCTGGATAAC\n-+\n-)**%&&%5'%)*$..5%$$,70?B?:>?GBLB<0))%&'-78>.11/$$+($&$&%***2),=88=@,1A=:3<25;<959E;.9?BB*A<CCHBBB==BCC>=B>B9?LL;AA74C>CNEDF<C%?7<D78.=>C=4=?@BB9@>>2-A14:C;..5\n-@f9a56724-1fa4-4807-bacc-f0a85587afd7\n-GAACACCGTAAGCTCCCAAAGTGATCACCATTCGCTTTTTCATGCATAGCTATGCAGCGACTGGCGATCCTGACGCATCTTCTGTTTTCC\n-+\n-414===BA;3-.21$+&')'%-20-,((+6:57(*+,139@@?A534546/24022**)$$$#&(--++-7<+6*.*(())(,-03<:(,\n-@e525f31a-e4d6-4fe6-a703-7038efa2f178\n-GCTATGCAGCGAGCTGAAAACGATCCTGACGCATCCTTCCTGTTTCCCGGGTAAAACATCTCTTT\n-+\n-;B@E::0+5511=2122@>52((''*<G=E:?A45:79$=JFCC?AJG*024<>HECB@C@?KHI\n-@eeef3ccf-2188-4382-8399-a9442bbbf40a\n-GCTATGCAGCGAGCTGAAAACGATCCTGACGCATCCTTCCTGTTTTCCCGGGGTAAAACATCTCTTT\n-+\n-@=:3335937))43378KC?<?A<>CKMEH?BC55=4C>?KDFCLC4@G363877800+7<=;3;;>\n" |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/bwa-mem-mt-genome.fa.fai --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bwa-mem-mt-genome.fa.fai Tue Sep 01 03:08:04 2020 -0400 |
b |
@@ -0,0 +1,1 @@ +gi|251831106|ref|NC_012920.1| 16569 31 70 71 |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/medaka_test.bam |
b |
Binary file test-data/medaka_test.bam has changed |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/medaka_test.bam.bai |
b |
Binary file test-data/medaka_test.bam.bai has changed |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/medaka_test.hdf |
b |
Binary file test-data/medaka_test.hdf has changed |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/ref.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ref.fasta Tue Sep 01 03:08:04 2020 -0400 |
b |
@@ -0,0 +1,2 @@ +>NC_045512.2 +ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/ref.fasta.fai --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ref.fasta.fai Tue Sep 01 03:08:04 2020 -0400 |
b |
@@ -0,0 +1,1 @@ +NC_045512.2 2940 13 2940 2941 |
b |
diff -r 6a87478ed985 -r 35666d44fe7a test-data/variants.vcf.gz |
b |
Binary file test-data/variants.vcf.gz has changed |