changeset 6:4b0bee4d9a15 draft

Deleted selected files
author in_silico
date Tue, 12 Jun 2018 11:32:28 -0400
parents 7048ccf0ff7b
children a7e40372e56c
files cravat_convert/cravat_convert.py
diffstat 1 files changed, 0 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- a/cravat_convert/cravat_convert.py	Tue Jun 12 11:27:06 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-'''
-Convert a VCF format file to Cravat format file
-'''
-
-import os
-import argparse
-from vcf_converter import CravatConverter
-
-# File read/write configuration variables
-vcf_sep = '\t'
-cr_sep = '\t'
-cr_newline = '\n'
-
-# VCF Headers mapped to their index position in a row of VCF values
-vcf_mapping = {
-    'CHROM': 0,
-    'POS': 1,
-    'ID': 2,
-    'REF': 3,
-    'ALT': 4,
-    'QUAL': 5,
-    'FILTER': 6,
-    'INFO': 7,
-    'FORMAT': 8,
-    'NA00001': 9,
-    'NA00002': 10,
-    'NA00003': 11
-}
-
-
-def get_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--input',
-                            '-i',
-                            required = True,
-                            help='Input path to a VCF file for conversion',)
-    parser.add_argument('--output',
-                            '-o',
-                            default = os.path.join(os.getcwd(), "cravat_converted.txt"),
-                            help = 'Output path to write the cravat file to')
-    return parser.parse_args()
-
-
-def convert(in_path, out_path=None):
-    if not out_path:
-        base, _ = os.path.split(in_path)
-        out_path = os.path.join(base, "cravat_converted.txt")
-    
-    with open(in_path, 'r') as in_file, \
-    open(out_path, 'w') as out_file:
-
-        # cr_count will be used to generate the 'TR' field of the cravat rows (first header)
-        cr_count = 0
-        # VCF lines are always assumed to be '+' strand, as VCF doesn't specify that attribute
-        strand = '+'
-        # VCF converter. Adjusts position, reference, and alternate for Cravat formatting.
-        converter = CravatConverter()
-
-        for line in in_file:
-            if line.startswith("#"):
-                continue
-            line = line.strip().split(vcf_sep)
-            # row is dict of VCF headers mapped to corresponding values of this line
-            row = { header: line[index] for header, index in vcf_mapping.items() }
-            for alt in row["ALT"].split(","):
-                new_pos, new_ref, new_alt = converter.extract_vcf_variant(strand, row["POS"], row["REF"], alt)
-                new_pos, new_ref, new_alt = str(new_pos), str(new_ref), str(new_alt)
-                cr_line = cr_sep.join([
-                    'TR' + str(cr_count), row['CHROM'], new_pos, strand, new_ref, new_alt, row['ID']
-                ])
-                out_file.write(cr_line + cr_newline)
-                cr_count += 1
-
-
-if __name__ == "__main__":
-    cli_args = get_args()
-    convert(cli_args.input, cli_args.output)