comparison gene_family_scaffold_loader.py @ 2:cb101ec1a0dd draft

Uploaded
author greg
date Mon, 01 Oct 2018 13:36:56 -0400
parents 6282484a52bc
children f56e20e010e2
comparison
equal deleted inserted replaced
1:794951c24c86 2:cb101ec1a0dd
103 1. Parse all of the *.list files in the same directory to populate 103 1. Parse all of the *.list files in the same directory to populate
104 self.scaffold_genes_dict. 104 self.scaffold_genes_dict.
105 """ 105 """
106 scaffold_id = os.path.basename(self.args.scaffold_path) 106 scaffold_id = os.path.basename(self.args.scaffold_path)
107 file_dir = os.path.join(self.args.scaffold_path, 'annot') 107 file_dir = os.path.join(self.args.scaffold_path, 'annot')
108 # The scaffol naming convention must follow this pattern: 108 # The scaffold naming convention must follow this pattern:
109 # <integer1>Gv<integer2>.<integer3> 109 # <integer1>Gv<integer2>.<integer3>
110 # where integer 1 is the number of genomes in the scaffold_id. For example: 110 # where integer 1 is the number of genomes in the scaffold_id. For example:
111 # 22Gv1.1 -> 22 genomes 111 # 22Gv1.1 -> 22 genomes
112 # 12Gv1.0 -> 12 genomes 112 # 12Gv1.0 -> 12 genomes
113 # 26Gv2.0 -> 26 genomes, etc. 113 # 26Gv2.0 -> 26 genomes, etc.
135 i = 0 135 i = 0
136 for i2, line in enumerate(fh): 136 for i2, line in enumerate(fh):
137 if i2 == 0: 137 if i2 == 0:
138 # Skip first line. 138 # Skip first line.
139 continue 139 continue
140 line = line.rstrip('\n')
140 num_genes = 0 141 num_genes = 0
141 num_species = 0 142 num_species = 0
142 items = line.split("\t") 143 items = line.split("\t")
143 orthogroup_id = int(items[0]) 144 orthogroup_id = int(items[0])
144 # Zero based items 1 to num_genomes consists of the 145 # Zero based items 1 to num_genomes consists of the