Mercurial > repos > jjohnson > mothur_toolsuite
comparison mothur/lib/galaxy/datatypes/metagenomics.py @ 29:9c0cd3b92295
Fixes for metagenomics.py datatypes tahnks to Peter Briggs
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 28 May 2013 07:43:37 -0500 |
parents | 49058b1f8d3f |
children | a90d1915a176 |
comparison
equal
deleted
inserted
replaced
28:7238483c96fa | 29:9c0cd3b92295 |
---|---|
466 U68593 0.2872 0.1690 0.3361 0.2842 | 466 U68593 0.2872 0.1690 0.3361 0.2842 |
467 """ | 467 """ |
468 try: | 468 try: |
469 fh = open( filename ) | 469 fh = open( filename ) |
470 count = 0 | 470 count = 0 |
471 line = fh.readline() | |
472 sequence_count = int(line.strip()) | |
471 while True: | 473 while True: |
472 line = fh.readline() | 474 line = fh.readline() |
473 line = line.strip() | 475 line = line.strip() |
474 if not line: | 476 if not line: |
475 break #EOF | 477 break #EOF |
476 if line: | 478 if line: |
477 if line[0] != '@': | 479 # Split into fields |
478 linePieces = line.split('\t') | 480 linePieces = line.split('\t') |
479 if len(linePieces) != 3: | 481 # Each line should have the same number of |
480 return False | 482 # fields as the Python line index |
481 try: | 483 linePieces = line.split('\t') |
482 check = float(linePieces[2]) | 484 if len(linePieces) != (count + 1): |
483 except ValueError: | 485 return False |
484 return False | 486 # Distances should be floats |
485 count += 1 | 487 try: |
486 if count == 5: | 488 for linePiece in linePieces[2:]: |
487 return True | 489 check = float(linePiece) |
490 except ValueError: | |
491 return False | |
492 # Increment line counter | |
493 count += 1 | |
494 # Only check first 5 lines | |
495 if count == 5: | |
496 return True | |
488 fh.close() | 497 fh.close() |
489 if count < 5 and count > 0: | 498 if count < 5 and count > 0: |
490 return True | 499 return True |
491 except: | 500 except: |
492 pass | 501 pass |
563 The first and second columns have the sequence names and the third column is the distance between those sequences. | 572 The first and second columns have the sequence names and the third column is the distance between those sequences. |
564 """ | 573 """ |
565 try: | 574 try: |
566 fh = open( filename ) | 575 fh = open( filename ) |
567 count = 0 | 576 count = 0 |
577 all_ints = True | |
568 while True: | 578 while True: |
569 line = fh.readline() | 579 line = fh.readline() |
570 line = line.strip() | 580 line = line.strip() |
571 if not line: | 581 if not line: |
572 break #EOF | 582 break #EOF |
575 linePieces = line.split('\t') | 585 linePieces = line.split('\t') |
576 if len(linePieces) != 3: | 586 if len(linePieces) != 3: |
577 return False | 587 return False |
578 try: | 588 try: |
579 check = float(linePieces[2]) | 589 check = float(linePieces[2]) |
590 try: | |
591 # See if it's also an integer | |
592 check_int = int(linePieces[2]) | |
593 except ValueError: | |
594 # At least one value is not an | |
595 # integer | |
596 all_ints = False | |
580 except ValueError: | 597 except ValueError: |
581 return False | 598 return False |
582 count += 1 | 599 count += 1 |
583 if count == 5: | 600 if count == 5: |
584 return True | 601 if not all_ints: |
602 return True | |
603 else: | |
604 return False | |
585 fh.close() | 605 fh.close() |
586 if count < 5 and count > 0: | 606 if count < 5 and count > 0: |
587 return True | 607 if not all_ints: |
608 return True | |
609 else: | |
610 return False | |
588 except: | 611 except: |
589 pass | 612 pass |
590 finally: | 613 finally: |
591 fh.close() | 614 fh.close() |
592 return False | 615 return False |
908 if not line: | 931 if not line: |
909 break #EOF | 932 break #EOF |
910 line = line.strip() | 933 line = line.strip() |
911 if line: | 934 if line: |
912 fields = line.split('\t') | 935 fields = line.split('\t') |
913 if 2 <= len(fields) <= 3: | 936 if not (2 <= len(fields) <= 3): |
914 return False | 937 return False |
915 if not re.match(pat,fields[1]): | 938 if not re.match(pat,fields[1]): |
916 return False | 939 return False |
940 if len(fields) == 3: | |
941 check = int(fields[2]) | |
917 count += 1 | 942 count += 1 |
918 if count > 10: | 943 if count > 10: |
919 break | 944 break |
920 if count > 0: | 945 if count > 0: |
921 return True | 946 return True |
1115 break #EOF | 1140 break #EOF |
1116 if line: | 1141 if line: |
1117 fields = line.split('\t') | 1142 fields = line.split('\t') |
1118 if col_cnt == None: # ignore values in first line as they may be column headings | 1143 if col_cnt == None: # ignore values in first line as they may be column headings |
1119 col_cnt = len(fields) | 1144 col_cnt = len(fields) |
1145 # There should be at least 2 columns | |
1146 if col_cnt < 2: | |
1147 return False | |
1120 else: | 1148 else: |
1121 if len(fields) != col_cnt : | 1149 if len(fields) != col_cnt : |
1122 return False | 1150 return False |
1123 try: | 1151 try: |
1124 for i in range(1, col_cnt): | 1152 for i in range(1, col_cnt): |