comparison mothur/lib/galaxy/datatypes/metagenomics.py @ 2:e990ac8a0f58

Migrated tool version 1.19.0 from old tool shed archive to new tool shed repository
author jjohnson
date Tue, 07 Jun 2011 17:39:06 -0400
parents fcc0778f6987
children 7bfe1f843858
comparison
equal deleted inserted replaced
1:fcc0778f6987 2:e990ac8a0f58
710 finally: 710 finally:
711 close(fh) 711 close(fh)
712 return False 712 return False
713 713
714 class SequenceTaxonomy(Tabular): 714 class SequenceTaxonomy(Tabular):
715 file_ext = 'taxonomy' 715 file_ext = 'seq.taxonomy'
716 def __init__(self, **kwd): 716 """
717 """A list of names""" 717 A table with 2 columns:
718 - SequenceName
719 - Taxonomy (semicolon-separated taxonomy in descending order)
720 Example:
721 X56533.1 Eukaryota;Alveolata;Ciliophora;Intramacronucleata;Oligohymenophorea;Hymenostomatida;Tetrahymenina;Glaucomidae;Glaucoma;
722 X97975.1 Eukaryota;Parabasalidea;Trichomonada;Trichomonadida;unclassified_Trichomonadida;
723 AF052717.1 Eukaryota;Parabasalidea;
724 """
725 def __init__(self, **kwd):
718 Tabular.__init__( self, **kwd ) 726 Tabular.__init__( self, **kwd )
719 self.column_names = ['name','taxonomy'] 727 self.column_names = ['name','taxonomy']
728
729 def sniff( self, filename ):
730 """
731 Determines whether the file is a SequenceTaxonomy
732 """
733 try:
734 pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;])+$'
735 fh = open( filename )
736 count = 0
737 while True:
738 line = fh.readline()
739 if not line:
740 break #EOF
741 line = line.strip()
742 if line:
743 fields = line.split('\t')
744 if len(fields) != 2:
745 return False
746 if not re.match(pat,fields[1]):
747 return False
748 count += 1
749 if count > 10:
750 break
751 if count > 0:
752 return True
753 except:
754 pass
755 finally:
756 fh.close()
757 return False
758
759 class RDPSequenceTaxonomy(SequenceTaxonomy):
760 file_ext = 'rdp.taxonomy'
761 """
762 A table with 2 columns:
763 - SequenceName
764 - Taxonomy (semicolon-separated taxonomy in descending order, RDP requires exactly 6 levels deep)
765 Example:
766 AB001518.1 Bacteria;Bacteroidetes;Sphingobacteria;Sphingobacteriales;unclassified_Sphingobacteriales;
767 AB001724.1 Bacteria;Cyanobacteria;Cyanobacteria;Family_II;GpIIa;
768 AB001774.1 Bacteria;Chlamydiae;Chlamydiae;Chlamydiales;Chlamydiaceae;Chlamydophila;
769 """
770 def sniff( self, filename ):
771 """
772 Determines whether the file is a SequenceTaxonomy
773 """
774 try:
775 pat = '^([^ \t\n\r\f\v;]+([(]\d+[)])?[;]){6}$'
776 fh = open( filename )
777 count = 0
778 while True:
779 line = fh.readline()
780 if not line:
781 break #EOF
782 line = line.strip()
783 if line:
784 fields = line.split('\t')
785 if len(fields) != 2:
786 return False
787 if not re.match(pat,fields[1]):
788 return False
789 count += 1
790 if count > 10:
791 break
792 if count > 0:
793 return True
794 except:
795 pass
796 finally:
797 fh.close()
798 return False
720 799
721 class ConsensusTaxonomy(Tabular): 800 class ConsensusTaxonomy(Tabular):
722 file_ext = 'cons.taxonomy' 801 file_ext = 'cons.taxonomy'
723 def __init__(self, **kwd): 802 def __init__(self, **kwd):
724 """A list of names""" 803 """A list of names"""
843 fh.close() 922 fh.close()
844 return False 923 return False
845 924
846 ## Qiime Classes 925 ## Qiime Classes
847 926
848 class MetadataMapping(Tabular): 927 class QiimeMetadataMapping(Tabular):
849 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] ) 928 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
850 file_ext = 'mapping' 929 file_ext = 'qiimemapping'
851 930
852 def __init__(self, **kwd): 931 def __init__(self, **kwd):
853 """ 932 """
854 http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview 933 http://qiime.sourceforge.net/documentation/file_formats.html#mapping-file-overview
855 Information about the samples necessary to perform the data analysis. 934 Information about the samples necessary to perform the data analysis.
885 964
886 def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): 965 def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ):
887 Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) 966 Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines)
888 self.set_column_names(dataset) 967 self.set_column_names(dataset)
889 968
969 class QiimeOTU(Tabular):
970 """
971 Associates OTUs with sequence IDs
972 Example:
973 0 FLP3FBN01C2MYD FLP3FBN01B2ALM
974 1 FLP3FBN01DF6NE FLP3FBN01CKW1J FLP3FBN01CHVM4
975 2 FLP3FBN01AXQ2Z
976 """
977 file_ext = 'qiimeotu'
978
979 class QiimeOTUTable(Tabular):
980 """
981 #Full OTU Counts
982 #OTU ID PC.354 PC.355 PC.356 Consensus Lineage
983 0 0 1 0 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales
984 1 1 3 1 Root;Bacteria
985 2 0 2 2 Root;Bacteria;Bacteroidetes
986 """
987 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
988 file_ext = 'qiimeotutable'
989 def init_meta( self, dataset, copy_from=None ):
990 tabular.Tabular.init_meta( self, dataset, copy_from=copy_from )
991 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
992 self.set_column_names(dataset)
993 def set_column_names(self, dataset):
994 if dataset.has_data():
995 dataset_fh = open( dataset.file_name )
996 line = dataset_fh.readline()
997 line = dataset_fh.readline()
998 if line.startswith('#OTU ID'):
999 dataset.metadata.column_names = line.strip().split('\t');
1000 dataset_fh.close()
1001 dataset.metadata.comment_lines = 2
1002
1003 class QiimeDistanceMatrix(Tabular):
1004 """
1005 PC.354 PC.355 PC.356
1006 PC.354 0.0 3.177 1.955
1007 PC.355 3.177 0.0 3.444
1008 PC.356 1.955 3.444 0.0
1009 """
1010 file_ext = 'qiimedistmat'
1011 def init_meta( self, dataset, copy_from=None ):
1012 tabular.Tabular.init_meta( self, dataset, copy_from=copy_from )
1013 def set_meta( self, dataset, overwrite = True, skip = None, **kwd ):
1014 self.set_column_names(dataset)
1015 def set_column_names(self, dataset):
1016 if dataset.has_data():
1017 dataset_fh = open( dataset.file_name )
1018 line = dataset_fh.readline()
1019 # first line contains the names
1020 dataset.metadata.column_names = line.strip().split('\t');
1021 dataset_fh.close()
1022 dataset.metadata.comment_lines = 1
1023
1024 class QiimePCA(Tabular):
1025 """
1026 Principal Coordinate Analysis Data
1027 The principal coordinate (PC) axes (columns) for each sample (rows).
1028 Pairs of PCs can then be graphed to view the relationships between samples.
1029 The bottom of the output file contains the eigenvalues and % variation explained for each PC.
1030 Example:
1031 pc vector number 1 2 3
1032 PC.354 -0.309063936588 0.0398252112257 0.0744672231759
1033 PC.355 -0.106593922619 0.141125998277 0.0780204374172
1034 PC.356 -0.219869362955 0.00917241121781 0.0357281314115
1035
1036
1037 eigvals 0.480220500471 0.163567082874 0.125594470811
1038 % variation explained 51.6955484555 17.6079322939
1039 """
1040 file_ext = 'qiimepca'
1041
1042 class QiimeParams(Tabular):
1043 """
1044 ###pick_otus_through_otu_table.py parameters###
1045
1046 # OTU picker parameters
1047 pick_otus:otu_picking_method uclust
1048 pick_otus:clustering_algorithm furthest
1049
1050 # Representative set picker parameters
1051 pick_rep_set:rep_set_picking_method first
1052 pick_rep_set:sort_by otu
1053 """
1054 file_ext = 'qiimeparams'
1055
1056 class QiimePrefs(data.Text):
1057 """
1058 A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py.
1059 Example:
1060 {
1061 'background_color':'black',
1062
1063 'sample_coloring':
1064 {
1065 'Treatment':
1066 {
1067 'column':'Treatment',
1068 'colors':(('red',(0,100,100)),('blue',(240,100,100)))
1069 },
1070 'DOB':
1071 {
1072 'column':'DOB',
1073 'colors':(('red',(0,100,100)),('blue',(240,100,100)))
1074 }
1075 },
1076 'MONTE_CARLO_GROUP_DISTANCES':
1077 {
1078 'Treatment': 10,
1079 'DOB': 10
1080 }
1081 }
1082 """
1083 file_ext = 'qiimeprefs'
1084
1085 class QiimeTaxaSummary(Tabular):
1086 """
1087 Taxon PC.354 PC.355 PC.356
1088 Root;Bacteria;Actinobacteria 0.0 0.177 0.955
1089 Root;Bacteria;Firmicutes 0.177 0.0 0.444
1090 Root;Bacteria;Proteobacteria 0.955 0.444 0.0
1091 """
1092 MetadataElement( name="column_names", default=[], desc="Column Names", readonly=False, visible=True, no_value=[] )
1093 file_ext = 'qiimetaxsummary'
1094
1095 def set_column_names(self, dataset):
1096 if dataset.has_data():
1097 dataset_fh = open( dataset.file_name )
1098 line = dataset_fh.readline()
1099 if line.startswith('Taxon'):
1100 dataset.metadata.column_names = line.strip().split('\t');
1101 dataset_fh.close()
1102
1103 def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ):
1104 Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines)
1105 self.set_column_names(dataset)
1106
890 if __name__ == '__main__': 1107 if __name__ == '__main__':
891 import doctest, sys 1108 import doctest, sys
892 doctest.testmod(sys.modules[__name__]) 1109 doctest.testmod(sys.modules[__name__])
893 1110