# HG changeset patch # User gregory-minevich # Date 1332805012 14400 # Node ID bc7cc93ef659af8908427ca943b4072e678330a7 # Parent a3873bb68495273bda8aa17db0bd5486a6436620 Deleted selected files diff -r a3873bb68495 -r bc7cc93ef659 ._checkSnpEffCandidates.py Binary file ._checkSnpEffCandidates.py has changed diff -r a3873bb68495 -r bc7cc93ef659 ._checkSnpEffCandidates.xml Binary file ._checkSnpEffCandidates.xml has changed diff -r a3873bb68495 -r bc7cc93ef659 checkSnpEffCandidates.py --- a/checkSnpEffCandidates.py Tue Mar 20 11:02:34 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,70 +0,0 @@ -#!/usr/bin/python - -import sys -import optparse -import csv -import re - -def main(): - parser = optparse.OptionParser() - parser.add_option('-s', '--snpeff_file', dest = 'snpeff_file', action = 'store', type = 'string', default = None, help = "Path to the snpEff file") - parser.add_option('-c', '--candidate_list', dest = 'candidate_list', action = 'store', type = 'string', default = None, help = "Two column tabular list of candidate Gene ID, Type") - parser.add_option('-o', '--output', dest = 'output', action = 'store', type = 'string', default = None, help = "Output file name") - (options, args) = parser.parse_args() - - snpeff_file = options.snpeff_file - candidate_list = options.candidate_list - - candidates = parse_candidate_list(candidate_list = candidate_list) - mark_snpeff_file(snpeff_file = snpeff_file, output = options.output, candidates = candidates) - -def skip_and_write_headers(writer = None, reader = None, i_file = None): - # count headers - comment = 0 - while reader.next()[0].startswith('#'): - comment = comment + 1 - - # skip and write headers - i_file.seek(0) - for i in range(0, comment): - row = reader.next() - writer.writerow(row) - -def parse_candidate_list(candidate_list = ""): - i_file = open(candidate_list, 'rU') - reader = csv.reader(i_file, delimiter = '\t',) - - candidates = {} - for row in reader: - gene_id = row[0] - gene_type = row[1] - candidates[gene_id] = gene_type - - i_file.close() - - return candidates - -def mark_snpeff_file(snpeff_file = "", output = "", candidates = None): - i_file = open(snpeff_file, 'rU') - reader = csv.reader(i_file, delimiter = '\t') - - o_file = open(output, 'wb') - writer = csv.writer(o_file, delimiter = '\t') - - skip_and_write_headers(writer = writer, reader = reader, i_file = i_file) - - for row in reader: - gene_id = row[9] - if gene_id in candidates: - gene_type = candidates[gene_id] - row.append(gene_type) - else: - row.append('') - - writer.writerow(row) - - o_file.close() - i_file.close() - -if __name__ == "__main__": - main() diff -r a3873bb68495 -r bc7cc93ef659 checkSnpEffCandidates.xml --- a/checkSnpEffCandidates.xml Tue Mar 20 11:02:34 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ - - Marks up a snpEff output file with matches to a gene candidate list. - checkSnpEffCandidates.py -s $snpeff_file -c $candidate_list -o $output - - - - - - - - - sys - optparse - csv - - - - - - - -**What it does:** - -Indicates on a SnpEff output file which genes are found in a candidate list by comparing Gene IDs. - -For a description of the snpEff variant annotation and effect prediction tool: - -http://snpeff.sourceforge.net - ------- - -**Input:** - -The candidate list should be in a tabular format with two columns: Gene ID and Gene Description (e.g. C55B7.12 and transcription_factor). The file should contain no headers. - -Useful candidate lists (e.g. transcription factors, genes expressed in neurons, transgene silencers, chromatin factors) are available on the Hobert Lab website: - -http://biochemistry.hs.columbia.edu/labs/hobert/literature.html - - ------- - -**Citation:** - -This tool is part of the CloudMap package from the Hobert Lab. If you use this tool, please cite `Gregory Minevich, Danny Park, Richard J. Poole and Oliver Hobert CloudMap: A Cloud-based Pipeline for Analysis of Mutant Genome Sequences. (2012 In Preparation)`__ - - .. __: http://biochemistry.hs.columbia.edu/labs/hobert/literature.html - - - -