Mercurial > repos > drosofff > blastx_to_scaffold
comparison blastx_to_scaffold.py @ 0:a2e034f1638e draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
| author | drosofff |
|---|---|
| date | Sun, 21 Jun 2015 14:40:10 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a2e034f1638e |
|---|---|
| 1 #!/usr/bin/python | |
| 2 import sys | |
| 3 import argparse | |
| 4 | |
| 5 def insert_newlines(string, every=60): | |
| 6 lines = [] | |
| 7 for i in xrange(0, len(string), every): | |
| 8 lines.append(string[i:i+every]) | |
| 9 return '\n'.join(lines) | |
| 10 | |
| 11 def Parser(): | |
| 12 the_parser = argparse.ArgumentParser( | |
| 13 description="Generate DNA scaffold from blastx alignment of Contigs") | |
| 14 the_parser.add_argument( | |
| 15 '--sequences', action="store", type=str, help="input sequence file in fasta format") | |
| 16 the_parser.add_argument( | |
| 17 '--blastx-tab', dest="blastx_tab", action="store", type=str, help="13-columns tabular blastx output") | |
| 18 the_parser.add_argument( | |
| 19 '--output', action="store", type=str, help="output file path, fasta format") | |
| 20 args = the_parser.parse_args() | |
| 21 return args | |
| 22 | |
| 23 def __main__(): | |
| 24 args = Parser() | |
| 25 protLenght = int (open (args.blastx_tab, "r").readline().split("\t")[12]) | |
| 26 BlastxOutput = open (args.blastx_tab, "r") | |
| 27 Contigs = open (args.sequences, "r") | |
| 28 ContigsDict = {} | |
| 29 protScaffold = {} | |
| 30 | |
| 31 for line in Contigs: | |
| 32 if line[0] == ">": | |
| 33 header = line[1:-1] | |
| 34 ContigsDict[header] = "" | |
| 35 else: | |
| 36 ContigsDict[header] += line[:-1] | |
| 37 | |
| 38 protScaffold = dict ( [(i,"NNN") for i in range (1, protLenght+1)] ) | |
| 39 | |
| 40 for line in BlastxOutput: | |
| 41 fields = line[:-1].split("\t") | |
| 42 queryStart = int(fields[6]) | |
| 43 queryStop = int(fields[7]) | |
| 44 subjectStart = int(fields[8]) | |
| 45 subjectStop = int(fields[9]) | |
| 46 seqHeader = fields[0] | |
| 47 sequence = ContigsDict[seqHeader] | |
| 48 for i in range (subjectStart, subjectStop): | |
| 49 del protScaffold[i] | |
| 50 protScaffold[subjectStop] = ContigsDict[seqHeader][queryStart -1: queryStop] | |
| 51 | |
| 52 finalSeqList = [] | |
| 53 for i in sorted (protScaffold): | |
| 54 finalSeqList.append(protScaffold[i]) | |
| 55 finalSequence = insert_newlines("".join(finalSeqList)) | |
| 56 | |
| 57 Out = open (args.output, "w") | |
| 58 print >> Out, ">Scaffold" | |
| 59 print >> Out, finalSequence | |
| 60 | |
| 61 BlastxOutput.close() | |
| 62 Contigs.close() | |
| 63 Out.close() | |
| 64 | |
| 65 if __name__ == "__main__": | |
| 66 __main__() |
