Repository 'vcf_to_snp'
hg clone https://toolshed.g2.bx.psu.edu/repos/brigidar/vcf_to_snp

Changeset 0:75cedeb179aa (2015-11-02)
Next changeset 1:032d2c8cf8ae (2015-11-02)
Commit message:
Uploaded
added:
vcf_snp.py
b
diff -r 000000000000 -r 75cedeb179aa vcf_snp.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/vcf_snp.py Mon Nov 02 12:45:11 2015 -0500
[
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+#########################################################################################
+# #
+# Name       : vcf_snp.py #
+# Version     : 0.1 #
+# Project     : extract snp from vcf #
+# Description : Script to exctract snps #
+# Author      : Brigida Rusconi #
+# Date        : October 30 2015 #
+# #
+#########################################################################################
+#for replacement of a given value with NaN
+#http://stackoverflow.com/questions/18172851/deleting-dataframe-row-in-pandas-based-on-column-value
+
+# to remove any symbol and replace it with nan
+#http://stackoverflow.com/questions/875968/how-to-remove-symbols-from-a-string-with-python
+
+# for isin information
+#http://pandas.pydata.org/pandas-docs/stable/indexing.html
+
+# for selecting rows that have an indel:
+#http://stackoverflow.com/questions/14247586/python-pandas-how-to-select-rows-with-one-or-more-nulls-from-a-dataframe-without
+
+
+
+#------------------------------------------------------------------------------------------
+
+
+import argparse, os, sys, csv, IPython
+import pandas
+import pdb
+import numpy as np
+from pandas import *
+from IPython import get_ipython
+import matplotlib.pyplot as plt
+from pandas.util.testing import assert_frame_equal
+#------------------------------------------------------------------------------------------
+
+
+#output and input file name to give with the script
+parser = argparse.ArgumentParser()
+
+parser.add_argument('-o', '--output', help="snp tab")
+parser.add_argument('-s', '--snp_table', help="vcf")
+
+
+args = parser.parse_args()
+output_file = args.output
+input_file = args.snp_table
+#------------------------------------------------------------------------------------------
+
+
+#read in file as dataframe
+df =read_csv(input_file,sep='\t', dtype=object)
+df=df.set_index(['#CHROM','POS'])
+
+#need to fill na otherwise it cannot do boolean operations
+df=df.fillna('--')
+print "vcf " + str(df.index.size)
+#------------------------------------------------------------------------------------------
+
+# only columns with qbase and refbase in table
+count_qbase=list(df.columns.values)
+qindexes=[]
+for i, v in enumerate(count_qbase):
+    if 'ALT' in v:
+        qindexes.append(i)
+df2=df.iloc[:,qindexes]
+
+#------------------------------------------------------------------------------------------
+#pdb.set_trace()
+
+#------------------------------------------------------------------------------------------
+
+#save file with output name for fasta -o option and removes header and index
+with open(output_file,'w') as output:
+    df2.T.to_csv(output, sep='\t',header=False)
+#------------------------------------------------------------------------------------------
+
+
+