Mercurial > repos > jjohnson > mothur_toolsuite
comparison mothur/lib/galaxy/datatypes/metagenomics.py @ 2:e990ac8a0f58
Migrated tool version 1.19.0 from old tool shed archive to new tool shed repository
author | jjohnson |
---|---|
date | Tue, 07 Jun 2011 17:39:06 -0400 |
parents | fcc0778f6987 |
children | 7bfe1f843858 |
comparison
equal
deleted
inserted
replaced
1:fcc0778f6987 | 2:e990ac8a0f58 |
---|---|
710 finally: | 710 finally: |
711 close(fh) | 711 close(fh) |
712 return False | 712 return False |
713 | 713 |
714 class SequenceTaxonomy(Tabular): | 714 class SequenceTaxonomy(Tabular): |
715 file_ext = 'taxonomy' | 715 file_ext = 'seq.taxonomy' |
716 def __init__(self, **kwd): | 716 """ |
717 """A list of names""" | 717 A table with 2 columns: |
718 - SequenceName | |
719 - Taxonomy (semicolon-separated taxonomy in descending order) | |
720 Example: | |
721 X56533.1 Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma; | |
722 X97975.1 Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida; | |
723 AF052717.1 Eukaryota;Parabasalidea; | |
724 """ | |
725 def __init__(self, **kwd): | |
718 Tabular.__init__( self, **kwd ) | 726 Tabular.__init__( self, **kwd ) |
719 self.column_names = ['name','taxonomy'] | 727 self.column_names = ['name','taxonomy'] |
728 | |
729 def sniff( self, filename ): | |
730 """ | |
731 Determines whether the file is a SequenceTaxonomy | |
732 """ | |
733 try: | |
734 pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;])+$' | |
735 fh = open( filename ) | |
736 count = 0 | |
737 while True: | |
738 line = fh.readline() | |
739 if not line: | |
740 break #EOF | |
741 line = line.strip() | |
742 if line: | |
743 fields = line.split('\t') | |
744 if len(fields) != 2: | |
745 return False | |
746 if not re.match(pat,fields[1]): | |
747 return False | |
748 count += 1 | |
749 if count > 10: | |
750 break | |
751 if count > 0: | |
752 return True | |
753 except: | |
754 pass | |
755 finally: | |
756 fh.close() | |
757 return False | |
758 | |
759 class RDPSequenceTaxonomy(SequenceTaxonomy): | |
760 file_ext = 'rdp.taxonomy' | |
761 """ | |
762 A table with 2 columns: | |
763 - SequenceName | |
764 - Taxonomy (semicolon-separated taxonomy in descending order, RDP requires exactly 6 levels deep) | |
765 Example: | |
766 AB001518.1 Bacteria;Bacteroidetes;Sphingobacteria;Sphingobacteriales;unclassified_Sphingobacteriales; | |
767 AB001724.1 Bacteria;Cyanobacteria;Cyanobacteria;Family_II;GpIIa; | |
768 AB001774.1 Bacteria;Chlamydiae;Chlamydiae;Chlamydiales;Chlamydiaceae;Chlamydophila; | |
769 """ | |
770 def sniff( self, filename ): | |
771 """ | |
772 Determines whether the file is a SequenceTaxonomy | |
773 """ | |
774 try: | |
775 pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;]){6}$' | |
776 fh = open( filename ) | |
777 count = 0 | |
778 while True: | |
779 line = fh.readline() | |
780 if not line: | |
781 break #EOF | |
782 line = line.strip() | |
783 if line: | |
784 fields = line.split('\t') | |
785 if len(fields) != 2: | |
786 return False | |
787 if not re.match(pat,fields[1]): | |
788 return False | |
789 count += 1 | |
790 if count > 10: | |
791 break | |
792 if count > 0: | |
793 return True | |
794 except: | |
795 pass | |
796 finally: | |
797 fh.close() | |
798 return False | |
720 | 799 |
721 class ConsensusTaxonomy(Tabular): | 800 class ConsensusTaxonomy(Tabular): |
722 file_ext = 'cons.taxonomy' | 801 file_ext = 'cons.taxonomy' |
723 def __init__(self, **kwd): | 802 def __init__(self, **kwd): |
724 """A list of names""" | 803 """A list of names""" |
843 fh.close() | 922 fh.close() |
844 return False | 923 return False |
845 | 924 |
846 ## Qiime Classes | 925 ## Qiime Classes |
847 | 926 |
848 class MetadataMapping(Tabular): | 927 class QiimeMetadataMapping(Tabular): |
849 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) | 928 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) |
850 file_ext = 'mapping' | 929 file_ext = 'qiimemapping' |
851 | 930 |
852 def __init__(self, **kwd): | 931 def __init__(self, **kwd): |
853 """ | 932 """ |
854 http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview | 933 http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview |
855 Information about the samples necessary to perform the data analysis. | 934 Information about the samples necessary to perform the data analysis. |
885 | 964 |
886 def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): | 965 def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): |
887 Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) | 966 Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) |
888 self.set_column_names(dataset) | 967 self.set_column_names(dataset) |
889 | 968 |
969 class QiimeOTU(Tabular): | |
970 """ | |
971 Associates OTUs with sequence IDs | |
972 Example: | |
973 0 FLP3FBN01C2MYD FLP3FBN01B2ALM | |
974 1 FLP3FBN01DF6NE FLP3FBN01CKW1J FLP3FBN01CHVM4 | |
975 2 FLP3FBN01AXQ2Z | |
976 """ | |
977 file_ext = 'qiimeotu' | |
978 | |
979 class QiimeOTUTable(Tabular): | |
980 """ | |
981 #Full OTU Counts | |
982 #OTU ID PC.354 PC.355 PC.356 Consensus Lineage | |
983 0 0 1 0 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales | |
984 1 1 3 1 Root;Bacteria | |
985 2 0 2 2 Root;Bacteria;Bacteroidetes | |
986 """ | |
987 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) | |
988 file_ext = 'qiimeotutable' | |
989 def init_meta( self, dataset, copy_from=None ): | |
990 tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) | |
991 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): | |
992 self.set_column_names(dataset) | |
993 def set_column_names(self, dataset): | |
994 if dataset.has_data(): | |
995 dataset_fh = open( dataset.file_name ) | |
996 line = dataset_fh.readline() | |
997 line = dataset_fh.readline() | |
998 if line.startswith('#OTU ID'): | |
999 dataset.metadata.column_names = line.strip().split('\t'); | |
1000 dataset_fh.close() | |
1001 dataset.metadata.comment_lines = 2 | |
1002 | |
1003 class QiimeDistanceMatrix(Tabular): | |
1004 """ | |
1005 PC.354 PC.355 PC.356 | |
1006 PC.354 0.0 3.177 1.955 | |
1007 PC.355 3.177 0.0 3.444 | |
1008 PC.356 1.955 3.444 0.0 | |
1009 """ | |
1010 file_ext = 'qiimedistmat' | |
1011 def init_meta( self, dataset, copy_from=None ): | |
1012 tabular.Tabular.init_meta( self, dataset, copy_from=copy_from ) | |
1013 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): | |
1014 self.set_column_names(dataset) | |
1015 def set_column_names(self, dataset): | |
1016 if dataset.has_data(): | |
1017 dataset_fh = open( dataset.file_name ) | |
1018 line = dataset_fh.readline() | |
1019 # first line contains the names | |
1020 dataset.metadata.column_names = line.strip().split('\t'); | |
1021 dataset_fh.close() | |
1022 dataset.metadata.comment_lines = 1 | |
1023 | |
1024 class QiimePCA(Tabular): | |
1025 """ | |
1026 Principal Coordinate Analysis Data | |
1027 The principal coordinate (PC) axes (columns) for each sample (rows). | |
1028 Pairs of PCs can then be graphed to view the relationships between samples. | |
1029 The bottom of the output file contains the eigenvalues and % variation explained for each PC. | |
1030 Example: | |
1031 pc vector number 1 2 3 | |
1032 PC.354 -0.309063936588 0.0398252112257 0.0744672231759 | |
1033 PC.355 -0.106593922619 0.141125998277 0.0780204374172 | |
1034 PC.356 -0.219869362955 0.00917241121781 0.0357281314115 | |
1035 | |
1036 | |
1037 eigvals 0.480220500471 0.163567082874 0.125594470811 | |
1038 % variation explained 51.6955484555 17.6079322939 | |
1039 """ | |
1040 file_ext = 'qiimepca' | |
1041 | |
1042 class QiimeParams(Tabular): | |
1043 """ | |
1044 ###pick_otus_through_otu_table.py parameters### | |
1045 | |
1046 # OTU picker parameters | |
1047 pick_otus:otu_picking_method uclust | |
1048 pick_otus:clustering_algorithm furthest | |
1049 | |
1050 # Representative set picker parameters | |
1051 pick_rep_set:rep_set_picking_method first | |
1052 pick_rep_set:sort_by otu | |
1053 """ | |
1054 file_ext = 'qiimeparams' | |
1055 | |
1056 class QiimePrefs(data.Text): | |
1057 """ | |
1058 A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. | |
1059 Example: | |
1060 { | |
1061 'background_color':'black', | |
1062 | |
1063 'sample_coloring': | |
1064 { | |
1065 'Treatment': | |
1066 { | |
1067 'column':'Treatment', | |
1068 'colors':(('red',(0,100,100)),('blue',(240,100,100))) | |
1069 }, | |
1070 'DOB': | |
1071 { | |
1072 'column':'DOB', | |
1073 'colors':(('red',(0,100,100)),('blue',(240,100,100))) | |
1074 } | |
1075 }, | |
1076 'MONTE_CARLO_GROUP_DISTANCES': | |
1077 { | |
1078 'Treatment': 10, | |
1079 'DOB': 10 | |
1080 } | |
1081 } | |
1082 """ | |
1083 file_ext = 'qiimeprefs' | |
1084 | |
1085 class QiimeTaxaSummary(Tabular): | |
1086 """ | |
1087 Taxon PC.354 PC.355 PC.356 | |
1088 Root;Bacteria;Actinobacteria 0.0 0.177 0.955 | |
1089 Root;Bacteria;Firmicutes 0.177 0.0 0.444 | |
1090 Root;Bacteria;Proteobacteria 0.955 0.444 0.0 | |
1091 """ | |
1092 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) | |
1093 file_ext = 'qiimetaxsummary' | |
1094 | |
1095 def set_column_names(self, dataset): | |
1096 if dataset.has_data(): | |
1097 dataset_fh = open( dataset.file_name ) | |
1098 line = dataset_fh.readline() | |
1099 if line.startswith('Taxon'): | |
1100 dataset.metadata.column_names = line.strip().split('\t'); | |
1101 dataset_fh.close() | |
1102 | |
1103 def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): | |
1104 Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) | |
1105 self.set_column_names(dataset) | |
1106 | |
890 if __name__ == '__main__': | 1107 if __name__ == '__main__': |
891 import doctest, sys | 1108 import doctest, sys |
892 doctest.testmod(sys.modules[__name__]) | 1109 doctest.testmod(sys.modules[__name__]) |
893 | 1110 |