Previous changeset 12:597407d61386 (2021-09-17) Next changeset 14:1d62240feff3 (2022-06-27) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 52289bc7b99bfa8a3bda46cb35cea98399419dab" |
modified:
convert_VCF_info_fields.py macros.xml medaka_variant.xml |
added:
test-data/basecalls.fastq.gz |
removed:
test-data/basecalls.fastq |
b |
diff -r 597407d61386 -r 3fbefde449bc convert_VCF_info_fields.py --- a/convert_VCF_info_fields.py Fri Sep 17 20:22:27 2021 +0000 +++ b/convert_VCF_info_fields.py Thu Nov 18 20:01:04 2021 +0000 |
[ |
@@ -24,10 +24,10 @@ def parseInfoField(info): - info_fields = info.split(';') + info_fields = info.split(";") info_dict = OrderedDict() for info_field in info_fields: - code, val = info_field.split('=') + code, val = info_field.split("=") info_dict[code] = val return info_dict @@ -40,7 +40,7 @@ and multiple alternate alleles with simple ref, alt allele counterparts. """ - in_vcf = open(in_vcf_filepath, 'r') + in_vcf = open(in_vcf_filepath, "r") # medaka INFO fields that do not make sense after splitting of # multi-allelic records # DP will be overwritten with the value of DPSP because medaka tools @@ -48,8 +48,8 @@ # (https://github.com/nanoporetech/medaka/issues/192). # DPS, which is as unreliable as DP, gets skipped and the code # calculates the spanning reads equivalent DPSPS instead. - to_skip = {'SC', 'SR', 'AR', 'DP', 'DPSP', 'DPS'} - struct_meta_pat = re.compile('##(.+)=<ID=([^,]+)(,.+)?>') + to_skip = {"SC", "SR", "AR", "DP", "DPSP", "DPS"} + struct_meta_pat = re.compile("##(.+)=<ID=([^,]+)(,.+)?>") header_lines = [] contig_ids = set() contig_ids_simple = set() @@ -59,8 +59,8 @@ # - redundant contig information while True: line = in_vcf.readline() - if line[:2] != '##': - assert line.startswith('#CHROM') + if line[:2] != "##": + assert line.startswith("#CHROM") break if line in header_lines: # the annotate tool may generate lines already written by @@ -69,12 +69,12 @@ match = struct_meta_pat.match(line) if match: match_type, match_id, match_misc = match.groups() - if match_type == 'INFO': - if match_id == 'DPSP': - line = line.replace('DPSP', 'DP') + if match_type == "INFO": + if match_id == "DPSP": + line = line.replace("DPSP", "DP") elif match_id in to_skip: continue - elif match_type == 'contig': + elif match_type == "contig": contig_ids.add(match_id) if not match_misc: # the annotate tools writes its own contig info, @@ -87,7 +87,7 @@ # Lets check the above assumption about each ID-only contig line # having a more complete counterpart. assert not (contig_ids_simple - contig_ids) - header_lines.insert(1, '##convert_VCF_info_fields=0.2\n') + header_lines.insert(1, "##convert_VCF_info_fields=0.2\n") header_lines += [ '##INFO=<ID=DPSPS,Number=2,Type=Integer,Description="Depth of spanning reads by strand">\n', '##INFO=<ID=AF,Number=1,Type=Float,Description="Spanning Reads Allele Frequency">\n', @@ -96,47 +96,34 @@ '##INFO=<ID=SB,Number=1,Type=Integer,Description="Phred-scaled strand bias of spanning reads at this position">\n', '##INFO=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases in spanning reads">\n', '##INFO=<ID=AS,Number=4,Type=Integer,Description="Total alignment score to ref and alt allele of spanning reads by strand (ref fwd, ref rev, alt fwd, alt rev) aligned with parasail match 5, mismatch -4, open 5, extend 3">\n', - line + line, ] - with open(out_vcf_filepath, 'w') as out_vcf: + with open(out_vcf_filepath, "w") as out_vcf: out_vcf.writelines(header_lines) for line in in_vcf: - fields = line.split('\t') + fields = line.split("\t") info_dict = parseInfoField(fields[7]) - sr_list = [int(x) for x in info_dict["SR"].split(',')] - sc_list = [int(x) for x in info_dict["SC"].split(',')] + sr_list = [int(x) for x in info_dict["SR"].split(",")] + sc_list = [int(x) for x in info_dict["SC"].split(",")] if len(sr_list) != len(sc_list): - print( - 'WARNING - SR and SC are different lengths, ' - 'skipping variant' - ) + print("WARNING - SR and SC are different lengths, " "skipping variant") print(line.strip()) # Print the line for debugging purposes continue - variant_list = fields[4].split(',') - dpsp = int(info_dict['DPSP']) + variant_list = fields[4].split(",") + dpsp = int(info_dict["DPSP"]) ref_fwd, ref_rev = 0, 1 - dpspf, dpspr = (int(x) for x in info_dict['AR'].split(',')) + dpspf, dpspr = (int(x) for x in info_dict["AR"].split(",")) for i in range(0, len(sr_list), 2): dpspf += sr_list[i] dpspr += sr_list[i + 1] for j, i in enumerate(range(2, len(sr_list), 2)): - dp4 = ( - sr_list[ref_fwd], - sr_list[ref_rev], - sr_list[i], - sr_list[i + 1] - ) + dp4 = (sr_list[ref_fwd], sr_list[ref_rev], sr_list[i], sr_list[i + 1]) dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]] _, p_val = scipy.stats.fisher_exact(dp2x2) sb = pval_to_phredqual(p_val) - as_ = ( - sc_list[ref_fwd], - sc_list[ref_rev], - sc_list[i], - sc_list[i + 1] - ) + as_ = (sc_list[ref_fwd], sc_list[ref_rev], sc_list[i], sc_list[i + 1]) info = [] for code in info_dict: @@ -145,31 +132,31 @@ val = info_dict[code] info.append("%s=%s" % (code, val)) - info.append('DP=%d' % dpsp) - info.append('DPSPS=%d,%d' % (dpspf, dpspr)) + info.append("DP=%d" % dpsp) + info.append("DPSPS=%d,%d" % (dpspf, dpspr)) if dpsp == 0: - info.append('AF=NaN') + info.append("AF=NaN") else: af = (dp4[2] + dp4[3]) / dpsp - info.append('AF=%.6f' % af) + info.append("AF=%.6f" % af) if dpspf == 0: - info.append('FAF=NaN') + info.append("FAF=NaN") else: faf = dp4[2] / dpspf - info.append('FAF=%.6f' % faf) + info.append("FAF=%.6f" % faf) if dpspr == 0: - info.append('RAF=NaN') + info.append("RAF=NaN") else: raf = dp4[3] / dpspr - info.append('RAF=%.6f' % raf) - info.append('SB=%d' % sb) - info.append('DP4=%d,%d,%d,%d' % dp4) - info.append('AS=%d,%d,%d,%d' % as_) - new_info = ';'.join(info) + info.append("RAF=%.6f" % raf) + info.append("SB=%d" % sb) + info.append("DP4=%d,%d,%d,%d" % dp4) + info.append("AS=%d,%d,%d,%d" % as_) + new_info = ";".join(info) fields[4] = variant_list[j] fields[7] = new_info - out_vcf.write('\t'.join(fields)) + out_vcf.write("\t".join(fields)) in_vcf.close() |
b |
diff -r 597407d61386 -r 3fbefde449bc macros.xml --- a/macros.xml Fri Sep 17 20:22:27 2021 +0000 +++ b/macros.xml Thu Nov 18 20:01:04 2021 +0000 |
[ |
@@ -1,10 +1,10 @@ -<?xml version="1.0"?> <macros> - <token name="@TOOL_VERSION@">1.3.2</token> + <token name="@TOOL_VERSION@">1.4.4</token> + <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">20.01</token> <xml name="bio_tools"> <xrefs> - <xref type="bio.tools">khmer</xref> + <xref type="bio.tools">medaka</xref> </xrefs> </xml> <xml name="requirements"> @@ -52,22 +52,51 @@ <param argument="@ARGUMENT@" type="integer" value="100" min="1" label="Set inference batch size"/> </xml> <xml name="model" token_argument="-m" token_label="Select model"> - <param argument="@ARGUMENT@" type="select" label="@LABEL@"> + <param argument="@ARGUMENT@" type="select" label="@LABEL@" help="For best results it is important to specify the correct model, + according to the basecaller used. Medaka models are named to indicate i) the pore type, ii) the sequencing device (MinION + or PromethION), iii) the basecaller variant, and iv) the basecaller version"> + <option value="r103_fast_g507">r103_fast_g507</option> + <option value="r103_fast_snp_g507">r103_fast_snp_g507</option> + <option value="r103_fast_variant_g507">r103_fast_variant_g507</option> + <option value="r103_hac_g507">r103_hac_g507</option> + <option value="r103_hac_snp_g507">r103_hac_snp_g507</option> + <option value="r103_hac_variant_g507">r103_hac_variant_g507</option> <option value="r103_min_high_g345">r103_min_high_g345</option> <option value="r103_min_high_g360">r103_min_high_g360</option> <option value="r103_prom_high_g360">r103_prom_high_g360</option> <option value="r103_prom_snp_g3210">r103_prom_snp_g3210</option> <option value="r103_prom_variant_g3210">r103_prom_variant_g3210</option> + <option value="r103_sup_g507">r103_sup_g507</option> + <option value="r103_sup_snp_g507">r103_sup_snp_g507</option> + <option value="r103_sup_variant_g507">r103_sup_variant_g507</option> + <option value="r104_e81_fast_g5015">r104_e81_fast_g5015</option> + <option value="r104_e81_hac_g5015">r104_e81_hac_g5015</option> + <option value="r104_e81_sup_g5015">r104_e81_sup_g5015</option> <option value="r10_min_high_g303">r10_min_high_g303</option> <option value="r10_min_high_g340">r10_min_high_g340</option> <option value="r941_min_fast_g303">r941_min_fast_g303</option> + <option value="r941_min_fast_g507">r941_min_fast_g507</option> + <option value="r941_min_fast_snp_g507">r941_min_fast_snp_g507</option> + <option value="r941_min_fast_variant_g507">r941_min_fast_variant_g507</option> + <option value="r941_min_hac_g507">r941_min_hac_g507</option> + <option value="r941_min_hac_snp_g507">r941_min_hac_snp_g507</option> + <option value="r941_min_hac_variant_g507">r941_min_hac_variant_g507</option> <option value="r941_min_high_g303">r941_min_high_g303</option> <option value="r941_min_high_g330">r941_min_high_g330</option> <option value="r941_min_high_g340_rle">r941_min_high_g340_rle</option> <option value="r941_min_high_g344">r941_min_high_g344</option> <option value="r941_min_high_g351">r941_min_high_g351</option> <option value="r941_min_high_g360" selected="true">r941_min_high_g360</option> + <option value="r941_min_sup_g507">r941_min_sup_g507</option> + <option value="r941_min_sup_snp_g507">r941_min_sup_snp_g507</option> + <option value="r941_min_sup_variant_g507">r941_min_sup_variant_g507</option> <option value="r941_prom_fast_g303">r941_prom_fast_g303</option> + <option value="r941_prom_fast_g507">r941_prom_fast_g507</option> + <option value="r941_prom_fast_snp_g507">r941_prom_fast_snp_g507</option> + <option value="r941_prom_fast_variant_g507">r941_prom_fast_variant_g507</option> + <option value="r941_prom_hac_g507">r941_prom_hac_g507</option> + <option value="r941_prom_hac_snp_g507">r941_prom_hac_snp_g507</option> + <option value="r941_prom_hac_variant_g507">r941_prom_hac_variant_g507</option> <option value="r941_prom_high_g303">r941_prom_high_g303</option> <option value="r941_prom_high_g330">r941_prom_high_g330</option> <option value="r941_prom_high_g344">r941_prom_high_g344</option> @@ -76,6 +105,9 @@ <option value="r941_prom_snp_g303">r941_prom_snp_g303</option> <option value="r941_prom_snp_g322">r941_prom_snp_g322</option> <option value="r941_prom_snp_g360">r941_prom_snp_g360</option> + <option value="r941_prom_sup_g507">r941_prom_sup_g507</option> + <option value="r941_prom_sup_snp_g507">r941_prom_sup_snp_g507</option> + <option value="r941_prom_sup_variant_g507">r941_prom_sup_variant_g507</option> <option value="r941_prom_variant_g303">r941_prom_variant_g303</option> <option value="r941_prom_variant_g322">r941_prom_variant_g322</option> <option value="r941_prom_variant_g360">r941_prom_variant_g360</option> @@ -111,6 +143,28 @@ This task is performed using neural networks applied from a pileup of individual sequencing reads against a draft assembly. It outperforms graph-based methods operating on basecalled data, and can be competitive with state-of-the-art signal-based methods, whilst being much faster. ]]></token> + + <token name="@MODELS@"><![CDATA[ + +---- + +.. class:: infomark + +**Models** + +For best results it is important to specify the correct model, -m in the above, according to the basecaller used. Allowed values can be found by running medaka tools list\_models. + +Medaka models are named to indicate i) the pore type, ii) the sequencing device (MinION or PromethION), iii) the basecaller variant, and iv) the basecaller version, with the format: + + :: + + {pore}_{device}_{caller variant}_{caller version} + +For example the model named r941_min_fast_g303 should be used with data from MinION (or GridION) R9.4.1 flowcells using the fast Guppy basecaller version 3.0.3. By contrast the model +r941_prom_hac_g303 should be used with PromethION data and the high accuracy basecaller (termed "hac" in Guppy configuration files). Where a version of Guppy has been used without an exactly corresponding medaka model, the medaka model with the highest version equal to or less than the guppy version should be selected. + + ]]></token> + <token name="@REFERENCES@"><![CDATA[ More information are available in the `manual <https://nanoporetech.github.io/medaka/index.html>`_ and `github <https://github.com/nanoporetech/medaka>`_. ]]></token> |
b |
diff -r 597407d61386 -r 3fbefde449bc medaka_variant.xml --- a/medaka_variant.xml Fri Sep 17 20:22:27 2021 +0000 +++ b/medaka_variant.xml Thu Nov 18 20:01:04 2021 +0000 |
b |
@@ -1,4 +1,4 @@ -<tool id="medaka_variant_pipeline" name="medaka variant pipeline" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> +<tool id="medaka_variant_pipeline" name="medaka variant pipeline" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>via neural networks</description> <macros> <import>macros.xml</import> @@ -91,7 +91,7 @@ <data name="out_round_1_hap_2_probs_hdf" format="h5" label="${tool.name} on ${on_string}: round_1_hap_2_probs.hdf" from_work_dir="results/round_1_hap_2_probs.hdf"> <filter>'round_1_hap_2_probs_hdf' in out</filter> </data> - <data name="out_round_1_phased.vcf" format="vcf" label="${tool.name} on ${on_string}: round_1_phased.vcf" from_work_dir="results/round_1_phased.vcf"> + <data name="out_round_1_phased_vcf" format="vcf" label="${tool.name} on ${on_string}: round_1_phased.vcf" from_work_dir="results/round_1_phased.vcf"> <filter>'round_1_phased.vcf' in out</filter> </data> <data name="out_round_1_unfiltered_vcf" format="vcf" label="${tool.name} on ${on_string}: round_1_unfiltered.vcf" from_work_dir="results/round_1_unfiltered.vcf"> @@ -138,6 +138,10 @@ The module *medaka_variant* performs a variant calling via neural networks. +---- + +.. class:: infomark + **Input** It is unlikely that the model arguments should be changed from their defaults. @@ -145,6 +149,10 @@ - reads aligned to reference (BAM), should be aligned to the reference against which to call variants - reference (FASTA) +---- + +.. class:: infomark + **Output** - round_0_hap_mixed_phased.bam @@ -160,6 +168,12 @@ - round_1.vcf - log +@MODELS@ + +---- + +.. class:: infomark + **References** @REFERENCES@ |
b |
diff -r 597407d61386 -r 3fbefde449bc test-data/basecalls.fastq --- a/test-data/basecalls.fastq Fri Sep 17 20:22:27 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b"@@ -1,500 +0,0 @@\n-@215f1e0c-27d1-4446-bca5-cb0fd6a8c054\n-TTGCAGTATCTGCGTCAGAATCGCATTCCAGCCGCAGGTGTTCAGCGGTGCGTACAATTACAGCATTATGTTAAATTTTATAATTGTCTTTAGTCATTGCGCTATGATTTGTAGGTGGAGTTGTTTCATATTTTGATATAAATTCTTTTATCTCACCAAACAGTCATCGTCAGTATCAATGGTTTTGTTCTTCATTGATATTACCTATTCTAGAATCTGATTCCTTGTACAACTAAGGTAAACTTGAACTTTGCTTTCTTTTCATTATTTGGGTAAGCGTAATGATGTAGGATCTAGGATATATCTATAATAATAATTCTTGTTCTTTTTTTATTTTATTGAATATGAATCAATACCTGTCTAAGCACACCTCTTTCTTACGCGTGAGCAATGGTTAGATGGATCTTCTGGCGGCCTTGCGCCATTCCTGTTTGTTC\n-+\n-?B4-)&'%<ABC>:883110,;*-$/+%)(.(($$#%&$$#%14+6-/=1@5@A>GE@F@<B8B..0F>?AA13EJHTIDHBDAJB@BOM908=B;=>49/(?>>?>?@?(004?C57/9<B>DEEE@@@C?=>=<912+,*'1/1396;78+&$&,15(85::C@>BA>9(,</(B:;G?B;@K=<HF?JF5**+&&0/#%$;>BCB=(4<.=91<4@ADC9:AC$=9>555=3412;6;+=>?@>2$%+839@BH=CAOHFCI<.98<-0/*$%)0**()(')&$)$474)+/,'267432&&'&''''$%&'*')>?C3=;467===95489G:./5229,$$&$(&&$%%(''')+8%/0%.-%0$')&'*=566(-(685''57./4%)44+/*$$$'42:;6($$##$$$'-0(%67112$$'$())'7;:\n-@baf1c4f7-8f02-4c92-be5f-0431ca399c18\n-GGTATTACTTCGTTCAGTTACGTATTGCTGCGCGCACTCTCTGTCGTCGGCAGCGTCAGATGTGTATAAGAGACAGCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACAGGGGAGTTCGTGCACACAGCCCGAAGCTTGGAGCGAACGACCTACGCGAACCGAGATACCTACAGCGTGAGCTATGAAAGCGCCACGCTTCCCGAAGGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCGGGGGAAACACACCTGGTATCTTTTTATAGTCCTGTCCGGGTTTCGCCACCTCTGGCAGGCGTCGATTTTTGTGATGCTCGTCAGAGCGGAATTTATGGAAACGCCTGCGGCGCTGGCTTCCTCCGATGCTGCTTTTTGCTCACATATTCTTTCCGACTTTATCCCCCGATTCTGTGGATAACCGTATTACCGCTGCAGGTGAGCTGACACGCTCGCCGCAGTCGAACGACCGAGAGCGTAGCGAGTCAGTGAGCGAGGAAGCGGAAGAGCGCCTATGTGACATTTTCTCCTTACGCTCTGTTGTGCCGTTCGGCATCCTGTCTGAGCGTTATCTCTCTGTGCTATTTTTACTTCAAAGCGTGTCTGGATGCTGTTCTGGAGTTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGCAGCCTGTTCGTCAGCATCGGGTGCGTCCAGTTTTCGAGCAGCATTCAGGCTCTGACTTTTATGAATCCCGCCATGTTGGTACGGCTTTTCTTTCTTGTTCATCTTTTCGTTTTCTCCGTTCTGTCTGTCATCTGCGTTGTGTATGATTATATCGCGTACCACTTTTCGGCTGTTTGCTGCCGTTATTCTGCGCCGCTTGGCTTTTTGACGGGCATTTCTGTCAGACAACACTGTCACTGCCAAAAAACTGCCGTGCCTTTGTCGGTAATTCGAGCTTGCTGACAGGACAGGATGTACAATTGTTATACCGCGCATACATGCACGCTATTACAATTGCCCTGGTCAGGAGCTTTGCCCCGACACCCATGTCCAGATACGGAGCCATGTTTTGCTGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCCCCTGTTCGTCTGACGGCAGAGAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCGGTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACCCACTGACTGATGATCGGGTACTGAAAGAAGTTATGAGACTGGGAGCGTTACAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGGAGTATGCGTTTGATCGCTATTACGGGTATCACGTGCCCTGTTATCAGACTTATGGCAGATTGGCTTCCCGGAGAGAAAACTGTCGAAAACAGACGGTATGAACACCGTAAGCTCCCAAAGTGATCACCATTCGCTTTCATGCATAGCTATGCAGCGAGCTGAAAACGATCCTGACGCATCCTTCCTGTTTTCCCGGGGTAAACATCTTTTTTGCAGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTCAGCAGTGCGTACAATTACAGCATTATGTTAAATTTTATAGTGTCTTTAGTCATTTGCGCTATGATTTTGTAGGTGGAGTTGCTTCATATTTTTGATATAAAGTTCTTTTCCCATCTGCTAGAGCAGTCATCGTCAGTATCAATGGTTTGTTCTTCATTGATATGCCTATTCTAGAATCTGGTCCTTATTACAACTAAGGTAAACTTGAACTTTTGCTTTCTTTTCATTATTTGGGTAAGCGTAATCAGGATCTAAGGATGTAATAATAGATTCTTGTTCTTTTTTATTTTATTGAATAGATCGTCTATATATCTAAACACCTCTTTCTTACACGCGTGAGCAATAGTTGGAGTCTTCTATTGCGCCATTCTTGTTTTTCCTGGCATGATAAAGAAGATATTTTTGTGCTTCCCGTTAATGGCTATTTCATTGTGCGAGCTTGGTGATAAATGTTAAGCTCATTTAATGTTGAAATTATCTGATCTTTCCGTGTCAGAAAAACGATTCCGTCGGCATCGATGATTAAGAATATACTTTCTTTTCCATTAAAGACTCTATGCTTTTAAATGCTCTAAGGGTTTGTTTTAGTTTGTCTTTCCATTTTATTTTTTCAACATAAACATCATTAATTCCTTCTATTTCAAGTAGATTTAGGAAGTGAAAATCATCATCACCCTCAACAAAAAATGACTTTTTTATAATCATTTATTTTCTCGGTTTCATTTATCTCACCTCCCAGCCATTTTCAACTGAAAAGGAAAATCATCCATGTTAAATTGCATTGCTGTAGGTTTTGGGAATTGCTTGTCTCTTCCCAGTCTTATGTAAGATATATCTTTCTTTTTCATCTAATTCGTTTATCGCACATATTACATCATGGCTGTGTGTTGTTGTAAATATTTGATTATTATTGGTTTTGCTGCTTCTATCAAAGCGTTGATCATATCTTTGATTACTGAGTAATGAATTCCATTTTCTATCTCATCAATGAGAATAATTGAGTCTTTTTGAACTAAGATTGAGCTAATGAAAGACAGCGCTCTGCTTACTCCTTCACCAAGCATTGATATTTCATTTAATTCACTTAGTCCTATATCTAAAAAAATCTCTTTGTTTTGATCTGTTGCTATTATTGCAATATCGTTAATTCTATTATCTATTCTTTTAGGTTGCCTATTATTTCATCTTTCTTTTGTGTTAGTAGTTTGACCGTTTTTATCGTTGCTTCTTTGTTTATGCTTCTTGATGTGGTTATTACCGCGCTGCGGTTTTGAACTCATATAGCTTACACACTTTTTAGTTTGTTTTATTTCTCCGTTTATTTCTTGTGCTTTTTGTGTTATTTTTATAGTAATCAATTCTTCTGGGGGTTCTTTTCCGTTCTGTGTTTCACTAACCTTAATCGTTAAAACATTCCCCTTTTCCTGAGAATA"..b"=8+,$/-.$#+**'*;<=@==<..8-/0.1+,5003:><+>7=79227:678><:>99//16<==(@AGCBAE@>F/A@;BBCA?@:?FONPIBJGDDE>@<.%+,+$%)-/8)3==B>%$>=?@<IB;<>=<>D;0FB;8:=??@++.-500*'?:<80B8=<773007718..)5&&=/.01/2289,0<>8B>@?))+287%789;C?533*)(#&%-''-3;-*)(%35<5469<76:EF?<;=>87597<;3>9,&%+&3/3=996&556;=BG*3(.,<6)3).)+AA;@B*5&;>956?;;7/.10)00/,'%(>/299.)7.876.GCF;27-,=9-3--**$1,*)'/:&-0--,.35;:947873.306''*B=:>15B;GBG557><=@B99;666889;89<A?MEMI:?>?FCEIA7.?$--,+$.847'*22.*)24-'(86BA=73>BB6&*73;,$465:?)?((6-+++))$(--.,*<2>49474987=;=:;;:66537<=D@>DFP>-/&'/'258740.4*&'%&-5779,+...-&'+/1.0))4,-57?=?EGBB?8/,*%4&%08AB<<8-=DB7?10%$(+5-5:=8*)&%'%,'4AB>>&69GI?<899//:@;>1>BECA?2@%'./9GCDHAC@JJ:ADDE;D3,/7>>7125C;67;3772>;$7=A=H<77@B>E>A.<KDRPJA?><>@BAA00CAG.$:'?DDMFCDDEBB<-/:@?BFHE?<5<9+=@472<2=%+567;5A7&5=/0*++()78CCHDB>:;2EJ?>1)89?94/00?B?HKP/B88.2-7:976(+((,0:97==HEB;6..C66H://2ABFF;;3>:8C12AGH<AD81/28?A@HDGA<=BBHE?;ABE=?C',?F?B95<=64=@@AD>66;F?5:7@B?G:235>A64433/0&'(...1:;;BEDAOL?>?B@A:F=3D?>?&492+,-69)).7668@4;2,:DDEDB@80+*&.,#&($',('&/8:1/A-+,**;<96;9;><A/%6**$$%,6765A/1:=@B<=7,?ACDAC7D/446B>($DC>1.05-,++?9?A<>;17>AFHFC@A4-*-,21'&'(511.6/221$$&$%'&&&%014*??CC=<:FDCBB448@8:>65A35)565233>?FD;9DF<2<?D>?77<@F>?9*-.:84:?9832)+?+(*+25=))8//-<@@EB3+?HDB*&7-957*,/-*832?8@<=3-;908<4@96;(3&847((,&$$$<,5:/71,+*8669;<-:5;JDIE5,<+-?@1:A9@CD=.E/7A45::;8CECA@657A9>FD>AFBGD*$D-2BEA>;?4>C621?38B>C=>?::;@87%8981;<-4,2@AAA9<:52238>?A;@:ACCC3?:=0+/$&31@<,&:95)4>=018'/':9B6/)))9@85/*330:8;19E50@C<=6>4;6>?,8@@((G9:B::;CE=?9@8>ECF<FNJ7;KCCGHB+FE:>=?@>=A@EC?GBB389738//.;=A<:1)4==>HG7892AB90,)%&%#&%&2=C;@CI;0+D99=0084:5,,4;:@?FA88679:=7B@DA@B;<=:58;?>=;C?+/C).-*;3417=::=65FBDAC25+*<9?DC>BA;1)&&/6A=B?D0.5*/*(-/31,-$%/(4@<=:BCBDGBE78935%%;=78D>0.0.BEKH<@@DEH4:;HGA==:@<>AG,:A@=<:<GHF9-7(0=9<8;=?3AA+,$156>??A?ACBA32+,(/>??<;?.7&&&(*()$(59511/*000@A=BBA>?@IHNB8CA::-0235:;>?77:8;A>AA=:>=>66344CA1429B9:<@@@@A8927DCHGGOHFCA?A@EC4**+:66ADD@?CE>IHC49C;<=2(42(4685D$(?B<;:=>6?DB55AE@55@::(;?>II?CD=I@IC;IFUN:JHADG5-)88?=;<<85@D?9;@AA-/.256>DCC;:==+=:3627//080/*&.@<=5C@FE4942?D22'<?::;==20)'::;(9(%5+6454<4B;%+;A7)DC:MJLH<D8<CE7A9CCCG879@2GCD5;35.('&')*+,777$((+)17-7F69B9(&/15BA@BC==B0*-021:633$$7===E9;=D91?00;3D>777<?4,'=;@/CBG<=?;7?8)*.+*..**,,./.-CJGA@C@@?110BB<A@9327<=@@GLJ94/999662;<FEB@?@N@7:AAOH<2255),,/3C;HFPE?@BB>259;58@<>@GDC@@C9:=@A;<<>AA128@@B89>09?@+93><6;449<<9?==?>=::43.0-5%$$$$%43/,+7DGFTG=;:D?@=<8A>@<>?QGC<0,$,8:.2.06;:7>,=?==>+7465898<<NBD<<<<;<H>1%4<@KH:9>E;=@:<>IG)+33>9;9:9/12(555<AAD@:-,-:;?*/*-*%$(21.+1166=?BG*%%0+,'133.-'%%?@A9D=BDB=>=DFDEEI<8KMH5.;,,,-6A>:=79=73&$(3.68?8F>EF<ABBE?C;44HH?;33;444&;;(-<AMG@30C9896?5@:=AA94,+2481+>*@=:@B@BB>>3(((*).79:BECBED-07?ESAFDE<;+B944**+1>CFIID<;7/17>RNDEJA7:-.<??@DDHG<=41;<AB@=30,&+/%$)%$;AEGGCFFAA@HEB>-.,)*</AB<3.3*-%%++,6+))(\n-@ef1c09bf-a870-4994-85e6-196592dd4143\n-GTTGTACTTCGTTCGGTTACGTATTGCTGCGCGCGCACTCTCTGTCGTCAACGGCGTCAGATATTATTATAAGAGACAGCTCCTTCACCAAGCATTGATATTTCATTTAATTCGCCAGTCCTATATAAAAAAAATCTCTTTGTTTGATCTGTTGCTATTATTGCAATATCGTTAATTCTATTATCTATTCTTTTTAGGTTACCTATTATTTCATCTTTTCTTTTTGTGTTAGTAGTTTTGATACGTTTTATCGTTGCTTCTTCATTTATGTTTCTTGATGTGGTTATTATCGCTTGCGGTTTTGAATTCATATAGCTTACCTTTTTTAGTTTGTTTTATTTCTCCGTTTATTTCTTGTGCTTTTTTGTGTTATTTTTATAGTAATCAATTCTTCTGGGGGTTCTTTTTCCGTTCTGTGTTTCACTAACCTTAATCATTTAAAACATTTCCCCCCTTTCCTGAGAGCCGAGGGGTTTGTCGCAATGGAGGGTGTTTGATAATATTGACTCAACATTGGGGGACACGCTTGATTCTGTCTCTTATATACAATCTCCGAGCCCACGAGACCAGAGTGCGCGCAGCAATGCGTAACT\n-+\n-)8;541>AA?9F3@'.FD:->?E?D?AB56<70+)*&%)/02-2<<67;$$/$06518..1(*',)&(24')$&)16:6@@89EA=3)703,3''+0=85MG>B>@C?:;:,>$%%).))6-&'&%BD???@>=:?B;<?D>76750:>C9EE9D=@<8./1-.;>GE?A<NIDDECAGCFIC>BECA@;D7==491<E1%==A9HE<DD>D@>?@CM;)=@KHD9BABA98@A<BENCA;:6;3<=CAA@@@FI>78834.''24DA99@BI?MFFDD<>27*=<:?($4$%.11705A@MD?<8A=@7<@4C1ACCB@>GDB?8,1(8=F==:?A?FCC9A3><<=;79<C==<%?)9>?IG>='>,AF=CAJIC6AB@>0?9<68&*8K@C?467.<=5134LID564>:621=ACEH<@E:8<343/BA7C779<?AD@>9?BD23DB;889@:@=>=**342,+68:4CELD@G2C?@@BDC9)=GDHH@E@FBBGBGJEDBBA007/65$%<?FKB=B?((+EBJ>>?;00:;9;60+&&;798>>&924&ACG8>>=BC@?8:2;EA7.BB1+481254D>:5/#%\n" |
b |
diff -r 597407d61386 -r 3fbefde449bc test-data/basecalls.fastq.gz |
b |
Binary file test-data/basecalls.fastq.gz has changed |