Mercurial > repos > yufei-luo > s_mart
comparison commons/tools/dbSplit.py @ 18:94ab73e8a190
Uploaded
| author | m-zytnicki | 
|---|---|
| date | Mon, 29 Apr 2013 03:20:15 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 17:b0e8584489e6 | 18:94ab73e8a190 | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 | |
| 4 ##@file | |
| 5 # Split the input fasta file in several output files | |
| 6 # usage: dbSplit.py [ options ] | |
| 7 # options: | |
| 8 # -h: this help | |
| 9 # -i: name of the input file (format='fasta') | |
| 10 # -n: number of sequences per output file (default=1) | |
| 11 # -d: record the output fasta files in a directory called 'batches' | |
| 12 # -s: use the sequence header if '-n 1' (otherwise 'batch_00X')" | |
| 13 # -p: use a prefix for the output files (default='batch')" | |
| 14 # -v: verbose (default=0/1) | |
| 15 | |
| 16 | |
| 17 import sys | |
| 18 import getopt | |
| 19 | |
| 20 from commons.core.seq.FastaUtils import FastaUtils | |
| 21 | |
| 22 | |
| 23 ## Give the list of the command-line options | |
| 24 # | |
| 25 def help(): | |
| 26 print | |
| 27 print "usage: dbSplit.py [ options ]" | |
| 28 print "options:" | |
| 29 print " -h: this help" | |
| 30 print " -i: name of the input file (format='fasta')" | |
| 31 print " -n: number of sequences per batch file (default=1)" | |
| 32 print " -d: record the output fasta files in a directory called 'batches'" | |
| 33 print " -s: use the sequence header if '-n 1' (otherwise 'batch_00X')" | |
| 34 print " -p: use a prefix for the output files (default='batch')" | |
| 35 print " -v: verbosity level (default=0/1/2)" | |
| 36 print | |
| 37 | |
| 38 | |
| 39 ## Split the input fasta file in several output files | |
| 40 # | |
| 41 def main(): | |
| 42 inFile = "" | |
| 43 nbSeqPerBatch = 1 | |
| 44 newDir = False | |
| 45 useSeqHeader = False | |
| 46 prefix = "batch" | |
| 47 verbose = 0 | |
| 48 | |
| 49 try: | |
| 50 opts, args = getopt.getopt( sys.argv[1:], "hi:n:dsp:v:" ) | |
| 51 except getopt.GetoptError, err: | |
| 52 sys.stderr.write( "%s\n" % ( str(err) ) ) | |
| 53 help() | |
| 54 sys.exit(1) | |
| 55 for o,a in opts: | |
| 56 if o == "-h": | |
| 57 help() | |
| 58 sys.exit(0) | |
| 59 elif o == "-i": | |
| 60 inFile = a | |
| 61 elif o == "-n": | |
| 62 nbSeqPerBatch = int(a) | |
| 63 elif o == "-d": | |
| 64 newDir = True | |
| 65 elif o == "-s": | |
| 66 useSeqHeader = True | |
| 67 elif o == "-p": | |
| 68 prefix = a | |
| 69 elif o == "-v": | |
| 70 verbose = int(a) | |
| 71 | |
| 72 if inFile == "": | |
| 73 msg = "ERROR: missing input file (-i)" | |
| 74 sys.stderr.write( "%s\n" % ( msg ) ) | |
| 75 help() | |
| 76 sys.exit(1) | |
| 77 | |
| 78 if verbose > 0: | |
| 79 print "START %s" % ( sys.argv[0].split("/")[-1] ) | |
| 80 sys.stdout.flush() | |
| 81 | |
| 82 FastaUtils.dbSplit( inFile, nbSeqPerBatch, newDir, useSeqHeader, prefix, verbose ) | |
| 83 | |
| 84 if verbose > 0: | |
| 85 print "END %s" % ( sys.argv[0].split("/")[-1] ) | |
| 86 sys.stdout.flush() | |
| 87 | |
| 88 return 0 | |
| 89 | |
| 90 | |
| 91 if __name__ == "__main__": | |
| 92 main() | 
