comparison mothur/lib/galaxy/datatypes/metagenomics.py @ 29:9c0cd3b92295

Fixes for metagenomics.py datatypes tahnks to Peter Briggs
author Jim Johnson <jj@umn.edu>
date Tue, 28 May 2013 07:43:37 -0500
parents 49058b1f8d3f
children a90d1915a176
comparison
equal deleted inserted replaced
28:7238483c96fa 29:9c0cd3b92295
466 U68593 0.2872 0.1690 0.3361 0.2842 466 U68593 0.2872 0.1690 0.3361 0.2842
467 """ 467 """
468 try: 468 try:
469 fh = open( filename ) 469 fh = open( filename )
470 count = 0 470 count = 0
471 line = fh.readline()
472 sequence_count = int(line.strip())
471 while True: 473 while True:
472 line = fh.readline() 474 line = fh.readline()
473 line = line.strip() 475 line = line.strip()
474 if not line: 476 if not line:
475 break #EOF 477 break #EOF
476 if line: 478 if line:
477 if line[0] != '@': 479 # Split into fields
478 linePieces = line.split('\t') 480 linePieces = line.split('\t')
479 if len(linePieces) != 3: 481 # Each line should have the same number of
480 return False 482 # fields as the Python line index
481 try: 483 linePieces = line.split('\t')
482 check = float(linePieces[2]) 484 if len(linePieces) != (count + 1):
483 except ValueError: 485 return False
484 return False 486 # Distances should be floats
485 count += 1 487 try:
486 if count == 5: 488 for linePiece in linePieces[2:]:
487 return True 489 check = float(linePiece)
490 except ValueError:
491 return False
492 # Increment line counter
493 count += 1
494 # Only check first 5 lines
495 if count == 5:
496 return True
488 fh.close() 497 fh.close()
489 if count < 5 and count > 0: 498 if count < 5 and count > 0:
490 return True 499 return True
491 except: 500 except:
492 pass 501 pass
563 The first and second columns have the sequence names and the third column is the distance between those sequences. 572 The first and second columns have the sequence names and the third column is the distance between those sequences.
564 """ 573 """
565 try: 574 try:
566 fh = open( filename ) 575 fh = open( filename )
567 count = 0 576 count = 0
577 all_ints = True
568 while True: 578 while True:
569 line = fh.readline() 579 line = fh.readline()
570 line = line.strip() 580 line = line.strip()
571 if not line: 581 if not line:
572 break #EOF 582 break #EOF
575 linePieces = line.split('\t') 585 linePieces = line.split('\t')
576 if len(linePieces) != 3: 586 if len(linePieces) != 3:
577 return False 587 return False
578 try: 588 try:
579 check = float(linePieces[2]) 589 check = float(linePieces[2])
590 try:
591 # See if it's also an integer
592 check_int = int(linePieces[2])
593 except ValueError:
594 # At least one value is not an
595 # integer
596 all_ints = False
580 except ValueError: 597 except ValueError:
581 return False 598 return False
582 count += 1 599 count += 1
583 if count == 5: 600 if count == 5:
584 return True 601 if not all_ints:
602 return True
603 else:
604 return False
585 fh.close() 605 fh.close()
586 if count < 5 and count > 0: 606 if count < 5 and count > 0:
587 return True 607 if not all_ints:
608 return True
609 else:
610 return False
588 except: 611 except:
589 pass 612 pass
590 finally: 613 finally:
591 fh.close() 614 fh.close()
592 return False 615 return False
908 if not line: 931 if not line:
909 break #EOF 932 break #EOF
910 line = line.strip() 933 line = line.strip()
911 if line: 934 if line:
912 fields = line.split('\t') 935 fields = line.split('\t')
913 if 2 <= len(fields) <= 3: 936 if not (2 <= len(fields) <= 3):
914 return False 937 return False
915 if not re.match(pat,fields[1]): 938 if not re.match(pat,fields[1]):
916 return False 939 return False
940 if len(fields) == 3:
941 check = int(fields[2])
917 count += 1 942 count += 1
918 if count > 10: 943 if count > 10:
919 break 944 break
920 if count > 0: 945 if count > 0:
921 return True 946 return True
1115 break #EOF 1140 break #EOF
1116 if line: 1141 if line:
1117 fields = line.split('\t') 1142 fields = line.split('\t')
1118 if col_cnt == None: # ignore values in first line as they may be column headings 1143 if col_cnt == None: # ignore values in first line as they may be column headings
1119 col_cnt = len(fields) 1144 col_cnt = len(fields)
1145 # There should be at least 2 columns
1146 if col_cnt < 2:
1147 return False
1120 else: 1148 else:
1121 if len(fields) != col_cnt : 1149 if len(fields) != col_cnt :
1122 return False 1150 return False
1123 try: 1151 try:
1124 for i in range(1, col_cnt): 1152 for i in range(1, col_cnt):