Mercurial > repos > brigidar > vcf_to_snp
view vcf_snp.py @ 4:866cd9ce1cbd draft
update anaconda python
author | brigidar |
---|---|
date | Mon, 02 Nov 2015 19:42:35 -0500 |
parents | ea2f686dfd4a |
children | 3a352cb57117 |
line wrap: on
line source
#!/usr/bin/env python ######################################################################################### # # # Name : vcf_snp.py # # Version : 0.2 # # Project : extract snp from vcf # # Description : Script to exctract snps # # Author : Brigida Rusconi # # Date : November 02 2015 # # # ######################################################################################### # If the position is identical to the reference it does not print the nucleotide. I have to retrieve it from the ref column. #------------------------------------------------------------------------------------------ import argparse, os, sys, csv, IPython import pandas import pdb from pandas import * #------------------------------------------------------------------------------------------ #output and input file name to give with the script parser = argparse.ArgumentParser() parser.add_argument('-o', '--output', help="snp tab") parser.add_argument('-s', '--snp_table', help="vcf") args = parser.parse_args() output_file = args.output input_file = args.snp_table #------------------------------------------------------------------------------------------ #read in file as dataframe df =read_csv(input_file,sep='\t', dtype=object) #------------------------------------------------------------------------------------------ # only columns with qbase and refbase in table #count_qbase=list(df.columns.values) #qindexes=[] #for i, v in enumerate(count_qbase): # if 'ALT' in v: # qindexes.append(i) df2=df.iloc[:,3:5] #pdb.set_trace() #------------------------------------------------------------------------------------------ ref_list=[] for i in range(0,df2.index.size): if df2.iloc[i,1]==".": ref_list.append(df2.iloc[i,0][0]) else: ref_list.append(df2.iloc[i,1][0]) #pdb.set_trace() # ##------------------------------------------------------------------------------------------ # #save file with output name for fasta -o option and removes header and index with open(output_file,'w') as output: output.write(df.columns.values[4] + '\t' + ''.join([str(i) for v,i in enumerate(ref_list)])) ##------------------------------------------------------------------------------------------