Mercurial > repos > iuc > varscan_somatic
comparison varscan.py @ 7:2657ab48e16a draft
planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit f5e4d8903b6d69b0507f6a8fc4c2533fc7a15dfa
author | iuc |
---|---|
date | Mon, 21 Jan 2019 12:05:00 -0500 |
parents | 2c66c4025db2 |
children | b79bb8b09822 |
comparison
equal
deleted
inserted
replaced
6:2c66c4025db2 | 7:2657ab48e16a |
---|---|
441 self._standardize_format_fields(common_header) | 441 self._standardize_format_fields(common_header) |
442 # take the remaining metadata from the template header produced by | 442 # take the remaining metadata from the template header produced by |
443 # VarScan | 443 # VarScan |
444 with pysam.VariantFile(varcall_template, 'r') as original_data: | 444 with pysam.VariantFile(varcall_template, 'r') as original_data: |
445 varscan_header = original_data.header | 445 varscan_header = original_data.header |
446 # don't propagate non-standard and redundant FORMAT keys written | |
447 # by VarScan | |
448 varscan_header.formats.remove_header('AD') | |
449 varscan_header.formats.remove_header('FREQ') | |
450 varscan_header.formats.remove_header('RD') | |
451 varscan_header.formats.remove_header('DP4') | |
446 for sample in varscan_header.samples: | 452 for sample in varscan_header.samples: |
447 common_header.samples.add(sample) | 453 common_header.samples.add(sample) |
448 common_header.merge(varscan_header) | 454 common_header.merge(varscan_header) |
449 # don't propagate non-standard and redundant FORMAT keys written | |
450 # by VarScan | |
451 common_header.formats.remove_header('FREQ') | |
452 common_header.formats.remove_header('RD') | |
453 common_header.formats.remove_header('DP4') | |
454 return common_header | 455 return common_header |
455 | 456 |
456 def _fix_read_gt_fields(self, read): | 457 def _fix_record_gt_fields(self, record): |
457 """Migrate non-standard genotype fields to standard ones.""" | 458 """Migrate non-standard genotype fields to standard ones.""" |
458 | 459 |
459 for gt_field in read.samples.values(): | 460 # The key issue to consider here is that we need to modify |
461 # genotype field values on a per-sample basis, but htslib | |
462 # reserves memory for the values of all samples upon the first | |
463 # modification of the field in the variant record. | |
464 # => We need to calculate all values first, then fill each | |
465 # genotype field starting with the sample with the largest value. | |
466 new_gt_fields = {'AD': [], 'ADF': [], 'ADR': []} | |
467 # store the current genotype field contents for each sample | |
468 per_sample_gts = record.samples.values() | |
469 # calculate and store the new genotype field values | |
470 for gt_field in per_sample_gts: | |
460 # generate a standard AD field by combining VarScan's | 471 # generate a standard AD field by combining VarScan's |
461 # RD and non-standard AD fields | 472 # RD and non-standard AD fields |
462 gt_field['AD'] = (gt_field['RD'], gt_field['AD'][0]) | 473 new_gt_fields['AD'].append((gt_field['RD'], gt_field['AD'][0])) |
463 # split VarScan's DP4 field into the standard fields | 474 # split VarScan's DP4 field into the standard fields |
464 # ADF and ADR | 475 # ADF and ADR |
465 gt_field['ADF'] = ( | 476 new_gt_fields['ADF'].append( |
466 int(gt_field['DP4'][0]), int(gt_field['DP4'][2]) | 477 (int(gt_field['DP4'][0]), int(gt_field['DP4'][2])) |
467 ) | 478 ) |
468 gt_field['ADR'] = ( | 479 new_gt_fields['ADR'].append( |
469 int(gt_field['DP4'][1]), int(gt_field['DP4'][3]) | 480 (int(gt_field['DP4'][1]), int(gt_field['DP4'][3])) |
470 ) | 481 ) |
482 # Modify the record's genotype fields. | |
483 # For each field, modify the sample containing the largest | |
484 # value for the field first. | |
485 # Without this precaution we could trigger: | |
486 # "bcf_update_format: Assertion `!fmt->p_free' failed." | |
487 # in vcf.c of htslib resulting in a core dump. | |
488 for field in sorted(new_gt_fields): | |
489 for new_field, sample_fields in sorted( | |
490 zip(new_gt_fields[field], per_sample_gts), | |
491 key=lambda x: max(x[0]), | |
492 reverse=True | |
493 ): | |
494 sample_fields[field] = new_field | |
471 # remove redundant fields | 495 # remove redundant fields |
472 # FREQ is easy to calculate from AD | 496 # FREQ is easy to calculate from AD |
473 del read.format['FREQ'] | 497 del record.format['FREQ'] |
474 del read.format['RD'] | 498 del record.format['RD'] |
475 del read.format['DP4'] | 499 del record.format['DP4'] |
476 | 500 |
477 def pileup_masker(self, mask): | 501 def pileup_masker(self, mask): |
478 def apply_mask_on_pileup(piled_items): | 502 def apply_mask_on_pileup(piled_items): |
479 for item, status in zip(piled_items, mask): | 503 for item, status in zip(piled_items, mask): |
480 if status: | 504 if status: |
843 snp_invcf, | 867 snp_invcf, |
844 self._indel_flagged_records(indel_invcf) | 868 self._indel_flagged_records(indel_invcf) |
845 ), | 869 ), |
846 **filter_args | 870 **filter_args |
847 ): | 871 ): |
848 self._fix_read_gt_fields(record) | 872 self._fix_record_gt_fields(record) |
849 o.write(str(record)) | 873 o.write(str(record)) |
850 try: | 874 try: |
851 os.remove(snp_f) | 875 os.remove(snp_f) |
852 except Exception: | 876 except Exception: |
853 pass | 877 pass |
869 self._add_filters_to_header(invcf.header) | 893 self._add_filters_to_header(invcf.header) |
870 self._standardize_format_fields(invcf.header) | 894 self._standardize_format_fields(invcf.header) |
871 for record in apply_filters( | 895 for record in apply_filters( |
872 invcf, **filter_args | 896 invcf, **filter_args |
873 ): | 897 ): |
874 self._fix_read_gt_fields(record) | 898 self._fix_record_gt_fields(record) |
875 o.write(str(record)) | 899 o.write(str(record)) |
876 try: | 900 try: |
877 os.remove(f) | 901 os.remove(f) |
878 except Exception: | 902 except Exception: |
879 pass | 903 pass |
890 self._add_indel_info_flag_to_header(invcf.header) | 914 self._add_indel_info_flag_to_header(invcf.header) |
891 self._standardize_format_fields(invcf.header) | 915 self._standardize_format_fields(invcf.header) |
892 for record in apply_filters( | 916 for record in apply_filters( |
893 self._indel_flagged_records(invcf), **filter_args | 917 self._indel_flagged_records(invcf), **filter_args |
894 ): | 918 ): |
895 self._fix_read_gt_fields(record) | 919 self._fix_record_gt_fields(record) |
896 o.write(str(record)) | 920 o.write(str(record)) |
897 try: | 921 try: |
898 os.remove(f) | 922 os.remove(f) |
899 except Exception: | 923 except Exception: |
900 pass | 924 pass |