# HG changeset patch # User abims-sbr # Date 1549034797 18000 # Node ID eb95bf7f90ae2bb3bb11c88982a758a573ebf3f5 planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty diff -r 000000000000 -r eb95bf7f90ae CDS_search.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CDS_search.xml Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,306 @@ + + + + ORF and CDS search + + + + macros.xml + + + + + + + > list_files; + #end for + + ln -s $__tool_directory__/scripts/dico.py . && + + python $__tool_directory__/scripts/S01_find_orf_on_multiple_alignment.py + $__tool_directory__/scripts/code_universel_modified.txt + $length.min_length_seq + $nb_species_keep + list_files + > '$log' && + + python $__tool_directory__/scripts/S02_remove_too_short_bit_or_whole_sequence.py + $nb_species_keep + $methionine + $length.min_length_seq + $length.min_length_subseq + >> '$log' && + + python $__tool_directory__/scripts/S03_remove_site_with_not_enough_species_represented.py + $nb_species_keep + $length.min_length_nuc + >> '$log'; + ]]> + + + + + + + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + +
+ + + + + out_BESTORF in ["aa","both"] + + + + + out_BESTORF in ["nuc","both"] + + + + + out_CDS in ["aa","both"] and not methionine + + + + + out_CDS in ["nuc","both"] and not methionine + + + + + (out_CDS == "aa" and methionine) or (out_CDS == "both" and methionine) + + + + + (out_CDS == "nuc" and methionine) or (out_CDS == "both" and methionine) + + + + + out_CDS_filter in ["aa","both"] + + + + + out_CDS_filter in ["nuc","both"] + + + + + + + + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + +
+ +
+ + +@HELP_AUTHORS@ + + + + + + + + + +
diff -r 000000000000 -r eb95bf7f90ae README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,14 @@ +Changelog + +**Version 2.0 - 05/07/2017** + + - NEW: Replace the zip between tools by Dataset Collection + + +**Version 1.0 - 13/04/2017** + + - Add funtional test with planemo + + - planemo test with conda dependency for python + + - Scripts renamed + symlinks to the directory 'scripts' diff -r 000000000000 -r eb95bf7f90ae macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,32 @@ + + + + python + + + +.. class:: infomark + +**Authors** Eric Fontanillas created the version 1 of this pipeline. Victor Mataigne developped version 2. + +.. class:: infomark + +**Galaxy integration** Julie Baffard and ABiMS TEAM, Roscoff Marine Station + + | Contact support.abims@sb-roscoff.fr for any questions or concerns about the Galaxy implementation of this tool. + | Credits : Gildas le Corguillé, Misharl Monsoor + +--------------------------------------------------- + + + + + + Credits : ABIMS team, Roscoff Marine Station + Contact support.abims@sb-roscoff.fr for any questions or concerns about the Galaxy implementation of this tool. + Version 1 : Scripts by Eric Fontanillas -- Galaxy integration by Julie Baffard + Version 2 : improvments by Victor Mataigne, Gildas le Corguillé, Misharl Monsoor + + + + diff -r 000000000000 -r eb95bf7f90ae scripts/S01_find_orf_on_multiple_alignment.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/S01_find_orf_on_multiple_alignment.py Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,318 @@ +#!/usr/bin/env python +# coding: utf8 +# Author: Eric Fontanillas +# Modification: 03/09/14 by Julie BAFFARD +# Last modification : 25/07/18 by Victor Mataigne + +# Description: Predict potential ORF on the basis of 2 criteria + 1 optional criteria + # CRITERIA 1 - Longest part of the alignment of sequence without codon stop "*", tested in the 3 potential ORF + # CRITERIA 2 - This longest part should be > 150nc or 50aa + # CRITERIA 3 - [OPTIONNAL] A codon start "M" should be present in this longuest part, before the last 50 aa + # OUTPUTs "05_CDS_aa" & "05_CDS_nuc" => NOT INCLUDE THIS CRITERIA + # OUTPUTs "06_CDS_with_M_aa" & "06_CDS_with_M_nuc" => INCLUDE THIS CRITERIA + +import string, os, time, re, zipfile, sys, argparse +from dico import dico + +def code_universel(F1): + """ Creates bash for genetic code (key : codon ; value : amino-acid) """ + bash_codeUniversel = {} + + with open(F1, "r") as file: + for line in file.readlines(): + L1 = string.split(line, " ") + length1 = len(L1) + if length1 == 3: + key = L1[0] + value = L1[2][:-1] + bash_codeUniversel[key] = value + else: + key = L1[0] + value = L1[2] + bash_codeUniversel[key] = value + + return(bash_codeUniversel) + +def multiple3(seq): + """ Tests if the sequence is a multiple of 3, and if not removes extra-bases + !! Possible to lost a codon, when I test ORF (as I will decay the ORF) """ + + m = len(seq)%3 + if m != 0 : + return seq[:-m], m + else : + return seq, m + +def detect_Methionine(seq_aa, Ortho, minimal_cds_length): + """ Detects if methionin in the aa sequence """ + + ln = len(seq_aa) + CUTOFF_Last_50aa = ln - minimal_cds_length + + # Find all indices of occurances of "M" in a string of aa + list_indices = [pos for pos, char in enumerate(seq_aa) if char == "M"] + + # If some "M" are present, find whether the first "M" found is not in the 50 last aa (indice < CUTOFF_Last_50aa) ==> in this case: maybenot a CDS + if list_indices != []: + first_M = list_indices[0] + if first_M < CUTOFF_Last_50aa: + Ortho = 1 # means orthologs found + + return(Ortho) + +def ReverseComplement2(seq): + """ Reverse complement DNA sequence """ + seq1 = 'ATCGN-TAGCN-atcgn-tagcn-' + seq_dict = { seq1[i]:seq1[i+6] for i in range(24) if i < 6 or 12<=i<=16 } + + return "".join([seq_dict[base] for base in reversed(seq)]) + +def simply_get_ORF(seq_dna, gen_code): + seq_by_codons = [seq_dna.upper().replace('T', 'U')[i:i+3] for i in range(0, len(seq_dna), 3)] + seq_by_aa = [gen_code[codon] if codon in gen_code.keys() else '?' for codon in seq_by_codons] + + return ''.join(seq_by_aa) + +def find_good_ORF_criteria_3(bash_aligned_nc_seq, bash_codeUniversel, minimal_cds_length, min_spec): + # Multiple sequence based : Based on the alignment of several sequences (orthogroup) + # Criteria 1 : Get the segment in the alignment with no codon stop + + # 1 - Get the list of aligned aa seq for the 3 ORF: + bash_of_aligned_aa_seq_3ORF = {} + bash_of_aligned_nuc_seq_3ORF = {} + BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION = [] + + for fasta_name in bash_aligned_nc_seq.keys(): + # Get sequence, chek if multiple 3, then get 6 orfs + sequence_nc = bash_aligned_nc_seq[fasta_name] + new_sequence_nc, modulo = multiple3(sequence_nc) + new_sequence_rev = ReverseComplement2(new_sequence_nc) + # For each seq of the multialignment => give the 6 ORFs (in nuc) + bash_of_aligned_nuc_seq_3ORF[fasta_name] = [new_sequence_nc, new_sequence_nc[1:-2], new_sequence_nc[2:-1], new_sequence_rev, new_sequence_rev[1:-2], new_sequence_rev[2:-1]] + + seq_prot_ORF1 = simply_get_ORF(new_sequence_nc, bash_codeUniversel) + seq_prot_ORF2 = simply_get_ORF(new_sequence_nc[1:-2], bash_codeUniversel) + seq_prot_ORF3 = simply_get_ORF(new_sequence_nc[2:-1], bash_codeUniversel) + seq_prot_ORF4 = simply_get_ORF(new_sequence_rev, bash_codeUniversel) + seq_prot_ORF5 = simply_get_ORF(new_sequence_rev[1:-2], bash_codeUniversel) + seq_prot_ORF6 = simply_get_ORF(new_sequence_rev[2:-1], bash_codeUniversel) + + # For each seq of the multialignment => give the 6 ORFs (in aa) + bash_of_aligned_aa_seq_3ORF[fasta_name] = [seq_prot_ORF1, seq_prot_ORF2, seq_prot_ORF3, seq_prot_ORF4, seq_prot_ORF5, seq_prot_ORF6] + + # 2 - Test for the best ORF (Get the longuest segment in the alignment with no codon stop ... for each ORF ... the longuest should give the ORF) + BEST_MAX = 0 + + for i in [0,1,2,3,4,5]: # Test the 6 ORFs + ORF_Aligned_aa = [] + ORF_Aligned_nuc = [] + + # 2.1 - Get the alignment of sequence for a given ORF + # Compare the 1rst ORF between all sequence => list them in ORF_Aligned_aa // them do the same for the second ORF, and them the 3rd + for fasta_name in bash_of_aligned_aa_seq_3ORF.keys(): + ORFsequence = bash_of_aligned_aa_seq_3ORF[fasta_name][i] + aa_length = len(ORFsequence) + ORF_Aligned_aa.append(ORFsequence) ### List of all sequences in the ORF nb "i" = + + n = i+1 + + for fasta_name in bash_of_aligned_nuc_seq_3ORF.keys(): + ORFsequence = bash_of_aligned_nuc_seq_3ORF[fasta_name][i] + nuc_length = len(ORFsequence) + ORF_Aligned_nuc.append(ORFsequence) # List of all sequences in the ORF nb "i" = + + # 2.2 - Get the list of sublist of positions whithout codon stop in the alignment + # For each ORF, now we have the list of sequences available (i.e. THE ALIGNMENT IN A GIVEN ORF) + # Next step is to get the longuest subsequence whithout stop + # We will explore the presence of stop "*" in each column of the alignment, and get the positions of the segments between the positions with "*" + MAX_LENGTH = 0 + LONGUEST_SEGMENT_UNSTOPPED = "" + j = 0 # Start from first position in alignment + List_of_List_subsequences = [] + List_positions_subsequence = [] + while j < aa_length: + column = [] + for seq in ORF_Aligned_aa: + column.append(seq[j]) + j = j+1 + if "*" in column: + List_of_List_subsequences.append(List_positions_subsequence) # Add previous list of positions + List_positions_subsequence = [] # Re-initialyse list of positions + else: + List_positions_subsequence.append(j) + + # 2.3 - Among all the sublists (separated by column with codon stop "*"), get the longuest one (BETTER SEGMENT for a given ORF) + LONGUEST_SUBSEQUENCE_LIST_POSITION = [] + MAX=0 + for sublist in List_of_List_subsequences: + if len(sublist) > MAX and len(sublist) > minimal_cds_length: + MAX = len(sublist) + LONGUEST_SUBSEQUENCE_LIST_POSITION = sublist + + # 2.4. - Test if the longuest subsequence start exactly at the beginning of the original sequence (i.e. means the ORF maybe truncated) + if LONGUEST_SUBSEQUENCE_LIST_POSITION != []: + if LONGUEST_SUBSEQUENCE_LIST_POSITION[0] == 0: + CDS_maybe_truncated = 1 + else: + CDS_maybe_truncated = 0 + else: + CDS_maybe_truncated = 0 + + + # 2.5 - Test if this BETTER SEGMENT for a given ORF, is the better than the one for the other ORF (GET THE BEST ORF) + # Test whether it is the better ORF + if MAX > BEST_MAX: + BEST_MAX = MAX + BEST_ORF = i+1 + BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION = LONGUEST_SUBSEQUENCE_LIST_POSITION + + + # 3 - ONCE we have this better segment (BEST CODING SEGMENT) + # ==> GET THE STARTING and ENDING POSITIONS (in aa position and in nuc position) + # And get the INDEX of the best ORF [0, 1, or 2] + if BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION != []: + pos_MIN_aa = BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION[0] + pos_MIN_aa = pos_MIN_aa - 1 + pos_MAX_aa = BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION[-1] + + + BESTORF_bash_of_aligned_aa_seq = {} + BESTORF_bash_of_aligned_aa_seq_CODING = {} + for fasta_name in bash_of_aligned_aa_seq_3ORF.keys(): + index_BEST_ORF = BEST_ORF-1 # cause list going from 0 to 2 in LIST_3_ORF, while the ORF nb is indexed from 1 to 3 + seq = bash_of_aligned_aa_seq_3ORF[fasta_name][index_BEST_ORF] + seq_coding = seq[pos_MIN_aa:pos_MAX_aa] + BESTORF_bash_of_aligned_aa_seq[fasta_name] = seq + BESTORF_bash_of_aligned_aa_seq_CODING[fasta_name] = seq_coding + + # 4 - Get the corresponding position (START/END of BEST CODING SEGMENT) for nucleotides alignment + pos_MIN_nuc = pos_MIN_aa * 3 + pos_MAX_nuc = pos_MAX_aa * 3 + + BESTORF_bash_aligned_nc_seq = {} + BESTORF_bash_aligned_nc_seq_CODING = {} + for fasta_name in bash_aligned_nc_seq.keys(): + seq = bash_of_aligned_nuc_seq_3ORF[fasta_name][index_BEST_ORF] + seq_coding = seq[pos_MIN_nuc:pos_MAX_nuc] + BESTORF_bash_aligned_nc_seq[fasta_name] = seq + BESTORF_bash_aligned_nc_seq_CODING[fasta_name] = seq_coding + + else: # no CDS found + BESTORF_bash_aligned_nc_seq = {} + BESTORF_bash_aligned_nc_seq_CODING = {} + BESTORF_bash_of_aligned_aa_seq = {} + BESTORF_bash_of_aligned_aa_seq_CODING ={} + + # Check whether their is a "M" or not, and if at least 1 "M" is present, that it is not in the last 50 aa + + BESTORF_bash_of_aligned_aa_seq_CDS_with_M = {} + BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = {} + + Ortho = 0 + for fasta_name in BESTORF_bash_of_aligned_aa_seq_CODING.keys(): + seq_aa = BESTORF_bash_of_aligned_aa_seq_CODING[fasta_name] + Ortho = detect_Methionine(seq_aa, Ortho, minimal_cds_length) ### DEF6 ### + + # CASE 1: A "M" is present and correctly localized (not in last 50 aa) + if Ortho == 1: + BESTORF_bash_of_aligned_aa_seq_CDS_with_M = BESTORF_bash_of_aligned_aa_seq_CODING + BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = BESTORF_bash_aligned_nc_seq_CODING + + # CASE 2: in case the CDS is truncated, so the "M" is maybe missing: + if Ortho == 0 and CDS_maybe_truncated == 1: + BESTORF_bash_of_aligned_aa_seq_CDS_with_M = BESTORF_bash_of_aligned_aa_seq_CODING + BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = BESTORF_bash_aligned_nc_seq_CODING + + # CASE 3: CDS not truncated AND no "M" found in good position (i.e. before the last 50 aa): + ## => the 2 bash "CDS_with_M" are left empty ("{}") + + return(BESTORF_bash_aligned_nc_seq, BESTORF_bash_aligned_nc_seq_CODING, BESTORF_bash_of_aligned_nuc_seq_CDS_with_M, BESTORF_bash_of_aligned_aa_seq, BESTORF_bash_of_aligned_aa_seq_CODING, BESTORF_bash_of_aligned_aa_seq_CDS_with_M) + +def write_output_file(results_dict, name_elems, path_out): + if results_dict != {}: + name_elems[3] = str(len(results_dict.keys())) + new_name = "_".join(name_elems) + + out1 = open("%s/%s" %(path_out,new_name), "w") + for fasta_name in results_dict.keys(): + seq = results_dict[fasta_name] + out1.write("%s\n" %fasta_name) + out1.write("%s\n" %seq) + out1.close() + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("codeUniversel", help="File describing the genetic code (code_universel_modified.txt") + parser.add_argument("min_cds_len", help="Minmal length of a CDS (in amino-acids)", type=int) + parser.add_argument("min_spec", help="Minimal number of species per alignment") + parser.add_argument("list_files", help="File with all input files names") + args = parser.parse_args() + + minimal_cds_length = int(args.min_cds_len) # in aa number + bash_codeUniversel = code_universel(args.codeUniversel) + minimum_species = int(args.min_spec) + + # Inputs from file containing list of species + list_files = [] + with open(args.list_files, 'r') as f: + for line in f.readlines(): + list_files.append(line.strip('\n')) + + # Directories for results + dirs = ["04_BEST_ORF_nuc", "04_BEST_ORF_aa", "05_CDS_nuc", "05_CDS_aa", "06_CDS_with_M_nuc", "06_CDS_with_M_aa"] + for directory in dirs: + os.mkdir(directory) + + count_file_processed, count_file_with_CDS, count_file_without_CDS, count_file_with_CDS_plus_M = 0, 0, 0, 0 + count_file_with_cds_and_enought_species, count_file_with_cds_M_and_enought_species = 0, 0 + + # ! : Currently, files are named "Orthogroup_x_y_sequences.fasta, where x is the number of the orthogroup (not important, juste here to make a distinct name), + # and y is the number of sequences/species in the group. These files are outputs of blastalign, where species can be removed. y is then modified. + name_elems = ["orthogroup", "0", "with", "0", "species.fasta"] + + # by fixing the counter here, there will be some "holes" in the outputs directories (missing numbers), but the groups between directories will correspond + #n0 = 0 + + for file in list_files: + #n0 += 1 + + count_file_processed = count_file_processed + 1 + nb_gp = file.split('_')[1] # Keep trace of the orthogroup number + fasta_file_path = "./%s" %file + bash_fasta = dico(fasta_file_path) + BESTORF_nuc, BESTORF_nuc_CODING, BESTORF_nuc_CDS_with_M, BESTORF_aa, BESTORF_aa_CODING, BESTORF_aa_CDS_with_M = find_good_ORF_criteria_3(bash_fasta, bash_codeUniversel, minimal_cds_length, minimum_species) + + name_elems[1] = nb_gp + + # Update counts and write group in corresponding output directory + if BESTORF_nuc != {}: + count_file_with_CDS += 1 + if len(BESTORF_nuc.keys()) >= minimum_species : + count_file_with_cds_and_enought_species += 1 + write_output_file(BESTORF_nuc, name_elems, dirs[0]) # OUTPUT BESTORF_nuc + write_output_file(BESTORF_aa, name_elems, dirs[1]) # The most interesting + else: + count_file_without_CDS += 1 + + if BESTORF_nuc_CODING != {} and len(BESTORF_nuc_CODING.keys()) >= minimum_species: + write_output_file(BESTORF_nuc_CODING, name_elems, dirs[2]) + write_output_file(BESTORF_aa_CODING, name_elems, dirs[3]) + + if BESTORF_nuc_CDS_with_M != {}: + count_file_with_CDS_plus_M += 1 + if len(BESTORF_nuc_CDS_with_M.keys()) >= minimum_species : + count_file_with_cds_M_and_enought_species += 1 + write_output_file(BESTORF_nuc_CDS_with_M, name_elems, dirs[4]) + write_output_file(BESTORF_aa_CDS_with_M, name_elems, dirs[5]) + + print "*************** CDS detection ***************" + print "\nFiles processed: %d" %count_file_processed + print "\tFiles with CDS: %d" %count_file_with_CDS + print "\tFiles wth CDS and more than %s species: %d" %(minimum_species, count_file_with_cds_and_enought_species) + print "\t\tFiles with CDS plus M (codon start): %d" %count_file_with_CDS_plus_M + print "\t\tFiles with CDS plus M (codon start) and more than %s species: %d" %(minimum_species,count_file_with_cds_M_and_enought_species) + print "\tFiles without CDS: %d \n" %count_file_without_CDS + print "" + +if __name__ == '__main__': + main() diff -r 000000000000 -r eb95bf7f90ae scripts/S02_remove_too_short_bit_or_whole_sequence.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/S02_remove_too_short_bit_or_whole_sequence.py Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# coding: utf8 +## Author: Eric Fontanillas +## Modification: 03/09/14 by Julie BAFFARD +## Last modification : 05/03/18 by Victor Mataigne + +## Description : find and remove indels + +################### +###### DEF 9 ###### +################### +def detect_short_indel(seq,MAX_LENGTH_SMALL_INDEL): + ## 1 ## Built the list of sublist of consecutive gap position + LIST = [] + sublist=[] + ln = len(seq) + i=0 + while i < ln: + if seq[i] == "-": + sublist.append(i) ## save gaps in sublist until a aa is found => else: + else: + LIST.append(sublist) ## save the list of gap + sublist = [] ## create new list of gap + i = i+1 + ## if gap at the end: add the last "sublist of gap" (not done in previous loop, at it add sublist (of gaps) only when in find aa, but if gap at the end, no aa after are present, so cannot add this last sublist to the LISt of gaps + if sublist != []: + LIST.append(sublist) + + ## 2 ## keep only the records of the small indel ( MIN_LENGTH_BIT_OF_SEQUENCE_aa: + LIST_sublist_aa.append(element) + + ## 4.3 ## [FILTER 3] : Remove all the sequence if the total length of all subsequences < "MIN_LENGTH_ALL_aa") + seq_all = "" + for bit_of_sequence in LIST_sublist_aa: + seq_all = seq_all + bit_of_sequence + + if len(seq_all) < MIN_LENGTH_ALL_aa: + LIST_sublist_aa = [] + + ## 4.4 ## [FILTER 4] : Detect sublist position in the original sequence, and recreate the filtered sequence from these positions: + seq_gap = "-" * len(seq) ## 4.4.1 ## generate a sequence with only gaps inside + seq_gap_nuc = "-" * len(seq_nuc) + + for subsequence in LIST_sublist_aa: + ## aa + START = string.find(seq, subsequence) + END = START + len(subsequence) + seq_gap = seq_gap[:START] + seq[START:END] + seq_gap[END:] ## 4.4.2 ## and then replace the correponding gaps by coding subsequence in the sequence + ## nuc + START_nuc = START*3 + END_nuc = END*3 + seq_gap_nuc = seq_gap_nuc[:START_nuc] + seq_nuc[START_nuc:END_nuc] + seq_gap_nuc[END_nuc:] + + ## 4.5 ## Save new sequence in bash if not empty + seq_empty_test = string.replace(seq_gap, "-", "") + if seq_empty_test != "": + new_bash_aa[fasta_name] = seq_gap + + seq_empty_test = string.replace(seq_gap_nuc, "-", "") + if seq_empty_test != "": + new_bash_nuc[fasta_name] = seq_gap_nuc + + # 4.6 ## Correct the nb of sequence in the output name, if necessary + n0 += 1 + name_elems[1] = file.split('_')[1] + #name_elems[1] = str(n0) + name_elems[3] = str(len(new_bash_nuc.keys())) + new_name = "_".join(name_elems) + dico_dico[new_name] = [new_bash_aa, new_bash_nuc] + list_new_file.append(new_name) + +## [FILTER 6]: print output only if at least "MIN_SPECIES_NB" species remaining in the alignment +for name in list_new_file : + dicoo = dico_dico[name] + dico_aa = dicoo[0] + dico_nuc = dicoo[1] + sp_nbre = len(dico_aa.keys()) + + if sp_nbre >= MIN_SPECIES_NB : + file_OUTaa = open("%s/%s" %(path_OUT1, name), "w") + file_OUTnuc = open("%s/%s" %(path_OUT2, name), "w") + + for fasta_name in dico_aa.keys() : + seq_aa = dico_aa[fasta_name] + file_OUTaa.write("%s\n" %fasta_name) + file_OUTaa.write("%s\n" %seq_aa) + for fasta_name in dico_nuc.keys() : + seq_nuc = dico_nuc[fasta_name] + file_OUTnuc.write("%s\n" %fasta_name) + file_OUTnuc.write("%s\n" %seq_nuc) + + file_OUTaa.close() + file_OUTnuc.close() + + else: + e+=1 + +###Print +if sys.argv[2] == "oui" : + print "\nIn locus with CDS considering Methionine : \n" +else : + print "\nIn locus with CDS regardless of the Methionine : \n" + +print "\nTotal number of locus recorded = %d" %n0 \ No newline at end of file diff -r 000000000000 -r eb95bf7f90ae scripts/S03_remove_site_with_not_enough_species_represented.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/S03_remove_site_with_not_enough_species_represented.py Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,191 @@ +#!/usr/bin/env python +# coding: utf8 +## Author: Eric Fontanillas +## Modification: 03/09/14 by Julie BAFFARD +## Last modification : 05/03/18 by Victor Mataigne + +## Description : find and remove indels + +#################### +###### DEF 2 ####### +#################### +def remove_position_with_too_much_missing_data(bash_aa, bash_nuc, MIN_SPECIES_NB): + + ## 1 ## Get alignment length + fasta_name0 = bash_aa.keys()[0] + ln_aa = len(bash_aa[fasta_name0]) + + ln_nuc = len(bash_nuc[fasta_name0]) + + + ## 2 ## Get positions keeped in aa alignment + LIST_POSITION_KEEPED_aa = [] + i=0 + while i < ln_aa: + site = [] + for fasta_name in bash_aa.keys(): + pos = bash_aa[fasta_name][i] + + if pos != "-" and pos != "?" and pos != "X": + site.append(pos) + if len(site) >= MIN_SPECIES_NB: + LIST_POSITION_KEEPED_aa.append(i) + i = i+1 + + ## 3 ## Get positions keeped in nuc alignment + LIST_POSITION_KEEPED_nuc = [] + for position in LIST_POSITION_KEEPED_aa: + position1 = position*3 + position2 = position*3 + 1 + position3 = position*3 + 2 + LIST_POSITION_KEEPED_nuc.append(position1) + LIST_POSITION_KEEPED_nuc.append(position2) + LIST_POSITION_KEEPED_nuc.append(position3) + + ## 4 ## Create entries for "filtered_bash" for aa & nuc + filtered_bash_aa = {} + filtered_bash_nuc = {} + for fasta_name in bash_aa.keys(): + filtered_bash_aa[fasta_name] = "" + for fasta_name in bash_nuc.keys(): + filtered_bash_nuc[fasta_name] = "" + + ## 5 ## Write "filtered_bash" for aa + j=0 + while j < ln_aa: + for fasta_name in bash_aa.keys(): + seq=filtered_bash_aa[fasta_name] + pos=bash_aa[fasta_name][j] + + if j in LIST_POSITION_KEEPED_aa: + seq = seq + pos + filtered_bash_aa[fasta_name] = seq + j = j + 1 + + ## 6 ## Remove empty sequence + for name in filtered_bash_aa.keys(): + seq = filtered_bash_aa[name] + if seq == '': + del filtered_bash_aa[name] + + + ## 7 ## Write "filtered_bash" for nuc + j=0 + while j < ln_nuc: + for fasta_name in bash_nuc.keys(): + seq=filtered_bash_nuc[fasta_name] + #print seq + pos=bash_nuc[fasta_name][j] + + if j in LIST_POSITION_KEEPED_nuc: + seq = seq + pos + filtered_bash_nuc[fasta_name] = seq + j = j + 1 + + ## 8 ## Remove empty sequence + for name in filtered_bash_nuc.keys(): + seq = filtered_bash_nuc[name] + if seq == '': + del filtered_bash_nuc[name] + + return(filtered_bash_aa, filtered_bash_nuc) +#################################### + + +####################### +##### RUN RUN RUN ##### +####################### +import string, os, time, re, sys +from dico import dico + +### 0 ### PARAMETERS +MIN_SPECIES_NB = int(sys.argv[1]) +MIN_LENGTH_FINAL_ALIGNMENT_NUC = int(sys.argv[2]) +n0 = 0 +bad = 0 +good = 0 +list_new_file = [] +dicoco = {} +list_file = [] +name_elems = ["orthogroup", "0", "with", "0", "species.fasta"] + +### 1 ### IN +path_IN1 = "./07_CDS_aa/" +L_IN1 = os.listdir(path_IN1) +lenght = len(L_IN1) +path_IN2 = "./07_CDS_nuc/" +L_IN2 = os.listdir(path_IN2) + +## 2 ## OUT +os.mkdir("08_CDS_aa_MINIMUM_MISSING_SEQUENCES") +path_OUT1 = "08_CDS_aa_MINIMUM_MISSING_SEQUENCES" +os.mkdir("08_CDS_nuc_MINIMUM_MISSING_SEQUENCES") +path_OUT2 = "08_CDS_nuc_MINIMUM_MISSING_SEQUENCES" + + +for file in L_IN1: + file_INaa = "%s/%s" %(path_IN1, file) + file_INnuc = "%s/%s" %(path_IN2, file) + + dico_aa = dico(file_INaa) ### DEF 1 ### + dico_nuc = dico(file_INnuc) ### DEF 1 ### + + if len(dico_aa) < MIN_SPECIES_NB : + list_file.append(file) + +if list_file == lenght : + MIN_SPECIES_NB == MIN_SPECIES_NB - 1 + + +for file in L_IN1 : + file_INaa = "%s/%s" %(path_IN1, file) + file_INnuc = "%s/%s" %(path_IN2, file) + + dico_aa = dico(file_INaa) ### DEF 1 ### + dico_nuc = dico(file_INnuc) ### DEF 1 ### + + ## 4.1 ## REMOVE POSITIONS WITH TOO MUCH MISSING DATA (i.e. not enough taxa represented at each position in the alignment) + filtered_bash_aa, filtered_bash_nuc = remove_position_with_too_much_missing_data(dico_aa, dico_nuc, MIN_SPECIES_NB) ### DEF 2 ### + + k = filtered_bash_nuc.keys() + new_leng_nuc = 0 + if k != []: + k0 = k[0] + seq0 = filtered_bash_nuc[k0] + new_leng_nuc = len(seq0) + + ## 4.3 ## Change file name for output, depending the number of species remaining in the alignment + n0+=1 + #name_elems[1] = str(n0) + name_elems[1] = file.split('_')[1] + name_elems[3] = str(len(filtered_bash_aa.keys())) + new_name = "_".join(name_elems) + + ## 4.5 ## Write filtered alignment in OUTPUTs + ## aa + if filtered_bash_aa != {} and new_leng_nuc >= MIN_LENGTH_FINAL_ALIGNMENT_NUC: + OUTaa=open("%s/%s" %(path_OUT1, new_name), "w") + for fasta_name in filtered_bash_aa.keys(): + seq_aa = filtered_bash_aa[fasta_name] + OUTaa.write("%s\n" %fasta_name) + OUTaa.write("%s\n" %seq_aa) + OUTaa.close() + # nuc + if filtered_bash_nuc != {} and new_leng_nuc >= MIN_LENGTH_FINAL_ALIGNMENT_NUC: + good+=1 + OUTnuc=open("%s/%s" %(path_OUT2, new_name), "w") + for fasta_name in filtered_bash_nuc.keys(): + seq_nuc = filtered_bash_nuc[fasta_name] + OUTnuc.write("%s\n" %fasta_name) + OUTnuc.write("%s\n" %seq_nuc) + OUTnuc.close() + else: + bad+=1 + + +## 5 ## Print +print "*************** 2nd Filter : removal of the indel ***************" +print "\nTotal number of locus recorded = %d" %n0 +print "\tTotal number of locus with no indels (SAVED) = %d" %good +print "\tTotal number of locus, when removing indel, wich are empty (EXCLUDED) = %d" %bad +print "" \ No newline at end of file diff -r 000000000000 -r eb95bf7f90ae scripts/code_universel_modified.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/code_universel_modified.txt Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,64 @@ +UUU Phe F +UCU Ser S +UAU Tyr Y +UGU Cys C +UUC Phe F +UCC Ser S +UAC Tyr Y +UGC Cys C +UUA Leu L +UCA Ser S +UAA Stop * +UGA Stop * +UUG Leu L +UCG Ser S +UAG Stop * +UGG Trp W +CUU Leu L +CCU Pro P +CAU His H +CGU Arg R +CUC Leu L +CCC Pro P +CAC His H +CGC Arg R +CUA Leu L +CCA Pro P +CAA Gln Q +CGA Arg R +CUG Leu L +CCG Pro P +CAG Gln Q +CGG Arg R +AUU Ile I +ACU Thr T +AAU Asn N +AGU Ser S +AUC Ile I +ACC Thr T +AAC Asn N +AGC Ser S +AUA Ile I +ACA Thr T +AAA Lys K +AGA Arg R +AUG Met M +ACG Thr T +AAG Lys K +AGG Arg R +GUU Val V +GCU Ala A +GAU Asp D +GGU Gly G +GUC Val V +GCC Ala A +GAC Asp D +GGC Gly G +GUA Val V +GCA Ala A +GAA Glu E +GGA Gly G +GUG Val V +GCG Ala A +GAG Glu E +GGG Gly G diff -r 000000000000 -r eb95bf7f90ae scripts/dico.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/dico.py Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,13 @@ +import string, itertools + +def dico(F1): + dicoco = {} + with open(F1, "r") as file: + for name, query in itertools.izip_longest(*[file]*2): + if name[0] == ">": + fasta_name_query = name[:-1] + Sn = string.split(fasta_name_query, "||") + fasta_name_query = Sn[0] + fasta_seq_query = query[:-1] + dicoco[fasta_name_query] = fasta_seq_query + return(dicoco) diff -r 000000000000 -r eb95bf7f90ae static/images/adaptsearch_picture_helps.png Binary file static/images/adaptsearch_picture_helps.png has changed diff -r 000000000000 -r eb95bf7f90ae test-data/cds_search.log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cds_search.log Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,24 @@ +*************** CDS detection *************** + +Files processed: 4 + Files with CDS: 2 + Files with CDS plus M (codon start): 1 + Files without CDS: 2 + + + +In locus with CDS regardless of the Methionine : + +*************** 1st filter : selection of the locus *************** + +Total number of locus recorded = 2 + Number of locus with 1 species : 1 + Number of locus with 2 species : 0 +Number of locus excluded (exclude if not at least 2 species in the alignment)= 1 + +*************** 2nd Filter : removal of the indel *************** + +Total number of locus recorded = 1 + Total number of locus with no indels (SAVED) = 1 + Total number of locus, when removing indel, wich are empty (EXCLUDED) = 0 + diff -r 000000000000 -r eb95bf7f90ae test-data/cds_search_methionine.log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cds_search_methionine.log Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,24 @@ +*************** CDS detection *************** + +Files processed: 1 + Files with CDS: 1 + Files with CDS plus M (codon start): 1 + Files without CDS: 0 + + + +In locus with CDS considering Methionine : + +*************** 1st filter : selection of the locus *************** + +Total number of locus recorded = 1 + Number of locus with 1 species : 0 + Number of locus with 2 species : 0 +Number of locus excluded (exclude if not at least 1 species in the alignment)= 0 + +*************** 2nd Filter : removal of the indel *************** + +Total number of locus recorded = 1 + Total number of locus with no indels (SAVED) = 1 + Total number of locus, when removing indel, wich are empty (EXCLUDED) = 0 + diff -r 000000000000 -r eb95bf7f90ae test-data/inputs/orthogroup_12_with_5_sequences.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/orthogroup_12_with_5_sequences.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Ac689_1/1_1.000_714 +ccgtccaaacgtgacgaatacgcggccgagctggccaaatacatcgacgtcgacgtctacggaaagtgcggcacgctgacgtgtccgaaggatgagaaggtcgactgcgaacagatgtgggccgaaacgtacaagtttcacttgtcctttgagaacacgatttgtcaagattacatcacg +>Ap6163_1/1_1.000_569 +-------------------------------tggccaagtacatcgacgtagacgtctatggcaagtgcggca----------------------------------------------------------------------------------------------------------- +>Pu6544_1/1_1.000_249 +------------------------------------------------------------------------------------------------------------------------------acgtacaagtttca---------------------------------------- diff -r 000000000000 -r eb95bf7f90ae test-data/inputs/orthogroup_14_with_4_sequences.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/orthogroup_14_with_4_sequences.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ap1491_1/1_1.000_963 +cgaagaaacatgacggagcaaatgacgcttcgcggtaccctccaagggcacggaggatgggtaacccaaattgctacaacgccacaatttcctgatatcattttgtcggcttctagagacaaatcgctcattctgtggcagctgactcgcgaggaatcgcgttacggcttccctcgcaaggccttgcgcggacatggacacttcgtgtctgacgtcgtcatgtcatcagatggacagttcgccctgtctggatcctgggatggaacccttcgtttgtgggatcttggcactggtcagacaactcgtaggtttgttggacacacgaaggacgtgctaagtgtggctttctcagctgataaccgtcagattgtgtcaggttcacgtgacaagaccatcaagttgtggaacactcttggggtgtgcaagtataccattcaggaagatgggcacacagagtgggtatcatgtgttcgattctcaccaaacacccagaatcccatcattgtgtcctgtggctgggacaaactggttaaggtgtggaatctgacaaactgcaagctaaaaacaaaccacttcggacactcaggttatctgaactgtgtcactgtgtcccctgatggatctttgtgcgcttctggtggaaaagatggccaggcaatgttatgggatttgaatgaaggcaagcatctgtacacattggatggtggtgatgtcatcaactcactgtgcttcagccccaacagatactggctttgtgctgcttctggaccaagcataaagatctgggatctggaaggcaaggttgttgtggatgagctgcgtccagaagtgatcagcaccagtgccagtgccgagccacctcagtgtatatccctggcttggtcagctgatggccagacactgtttgctggatacacagacaacctgattcgtgtgtggcaggtatctatggcagctacccga +>Ac6688_1/1_1.000_963 +cgaagaaacatgacggagcaaatgacgcttcgcggtaccctccaagggcacggaggatgggtaactcaaattgctacaacgccacaatttcccgatattattttgtcggcttcaagagacaaatcgctcatcctgtggcagctgactcgtgaggaatcgcgctacggtttccctcgcaaggccttgcgtggacatggacatttcgtgtctgacgttgttatgtcatcagatggacagttcgctctgtctggatcctgggatggaacccttcgtttgtgggatcttggcactggtcagacaactcgtaggtttgtcggacacacaaaagatgtgctaagcgtggccttctcagctgataaccgccagattgtgtcaggttcacgtgacaagaccatcaagttgtggaacactctcggtgtatgcaagtacaccattcaggaagatggacacacagagtgggtatcatgtgttcgcttctcaccaaacactcagaatcccatcattgtgtcttgtggctgggacaaactggttaaggtttggaatctgacaaactgcaaactaaaaacaaaccactttggacactcaggttacctgaactgtgtcaccgtgtcccctgatggatctttgtgtgcttctggtggtaaggatggccaggcaatgttgtgggatttgaatgaaggcaagcatctgtacacattggatggtggtgatgtcatcaactcactgtgcttcagccccaacagatattggctttgtgctgcctctggaccaagcataaagatctgggatctggaaggcaaggttgttgtggatgagttgcgtccagaagtgatcagtaccagtgccagcgctgaaccaccccagtgtatatccctggcatggtcagctgatggccagacgctgtttgcaggatacacagacaacctgatccgtgtctggcaggtgtccatggcagctacccga diff -r 000000000000 -r eb95bf7f90ae test-data/inputs/orthogroup_1_with_4_sequences.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/orthogroup_1_with_4_sequences.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Ac3644_1/1_1.000_1626 +attggcacagcatcatttgaaagccttgttgagttagttaaatattacaagaagaacccactttacagaaaaatgaaactcagatatgctgttaatgaggaagttgttcaacaacaaggaatggatccagatgaacaggcaatttacagtggagaaatgtacacaaatccaaatgattttgtatctaagattaaagtgagggctttgtatgactacaagaaacaacgtgaagatgaactg +>Ap2303_1/1_1.000_424 +attggcacagcatcctttgagagccttgttgagttagttaagtattacaagaagaacccactttacagaaaaatgaaactcagatatgcggttaatgaggaagttgtccagcaacaaggaatggatccagatgaacaggcaatatacagtggagaaatgtacacaaatccaaatgattttgtatctaagattaaag-------------------------------------------- +>Am7472_1/1_1.000_254 +attggtaccgcatcatttgagagtctggtagagctagtggaatactacaagaaaaaccc------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff -r 000000000000 -r eb95bf7f90ae test-data/inputs/orthogroup_6_with_4_sequences.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/orthogroup_6_with_4_sequences.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ac1013_1/1_1.000_525 +ttgaccttcaaggagctgaaaaaagccctcaaggccaaaggctacaaggtcaagggcaaacaactcaaggctcaattcaaacagtttgataaagatggcgataagaaaataacccttcaagaatacttgatcgcgatgggccaagtcccggatgcctaccacaaagaggcagccatgcggcaggctttcgagcgggcggacaaaaacaaagacggaagcttggacatcggcgaggttaacgccattttccaagagatgaacaccttccttgatccagacgagctcttcaagatcgtccacgccatcgacaaggaccacagcggacggatcaactacgacgaattcttgaccttcttcatgaagcagcaaaatgtcaactttgagagcagcgacagcgactgggac +>Ap5072_1/1_1.000_437 +ttgaccttcaaggagctgaaaaaggccctgaaggccaagggctacaaggtcaagggcaaacagctcaaggcccaattcaaacagtttgataaagacggcgacaagaaaatatcccttcaagaatacctgatcgcgatgggtcaagtcccggatgtctaccacaaagaggccgccatgcggcaggctttcgagcgggcggacaaaaacaaagacggaagcttggacatcggcgagatcaacgccatcttccgggagatgaacaccttcctcaatccagacgagctctttcagatcgtccacgaaatcgacaaggaccacagcggacggatcaactacaacgaattcctgaccttcttcatgaagcagcaaaatgtcaacttcgagagcagtgacagcgattggga- diff -r 000000000000 -r eb95bf7f90ae test-data/inputs/orthogroup_7_with_3_sequences.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/orthogroup_7_with_3_sequences.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Ac2173_1/1_1.000_330 +gcttggagaaggtcagaagcgttgaaaatgttgcagagagctcttcgtcttaaccagcttactcgtcgattttccacaagtgtggttcggcgaagtgaagaatggcaacaaaggggtcttcccggatctaacatgcctttcgatatgaacaaccgatacaagttgatggcttggttcatcctcttttttggttctggcttgggagtgccatatctcttagtccgccaccagcttctgaaggag +>Ap5050_1/1_1.000_243 +gcttggggaaggtcagaagctgtgaaaatgttgcagagagctcttcgtcttaatcaacttactcgtcgattttccacaagtgtggttcgacggagtgaagaatggcaacagaggggtcttcccggatctaacatgcctttcgacatgaacaaccgatacaaattgatggcgtggttcatcctcttttttggttctggcttgggagtgccatatctcttagtccgccaccagcttctgaaggag +>Am3527_1/1_1.000_270 +-------------------------------------------------------------ctcgtcgtttttccacaagtgtggtcagacaaagccaagaatggcaacagcttggagtacctggatcgaacatgccatttgacatcaacaacagatacaa---------------------------------------------------------------------------------- diff -r 000000000000 -r eb95bf7f90ae test-data/inputs/orthogroup_8_with_4_sequences.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/orthogroup_8_with_4_sequences.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Am6963_1/1_1.000_854 +gataagtcgtcaggagtacattatggcatcataacctgtgagggctgcaagggatttttc +>Pg7693_1/1_1.000_511 +---------------------------------acctgtgagggctg------------- diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_1_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_1_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am7472_1/1_1.000_254 +?????????????????????????????????????????????????????????????VFLVVFH*LYQTLK*CGTN +>Ap2303_1/1_1.000_424 +???????????????FNLRYKIIWICVHFSTVYCLFIWIHSLLLDNFLINRISEFHFSVKWVLLVILN*LNKALKGCCAN +>Ac3644_1/1_1.000_1626 +QFIFTLFLVVIQSPHFNLRYKIIWICVHFSTVNCLFIWIHSLLLNNFLINSISEFHFSVKWVLLVIFN*LNKAFK*CCAN diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +????????????????????????????VSVVDVKWHVRSRYSKLLPFLALSDHTCGKTT????????????????????? +>Ap5050_1/1_1.000_243 +LLQKLVAD*EIWHSQARTKKEDEPRHQFVSVVHVERHVRSGKTPLLPFFTPSNHTCGKSTSKLIKTKSSLQHFHSF*PSPS +>Ac2173_1/1_1.000_330 +LLQKLVAD*EIWHSQARTKKEDEPSHQLVSVVHIERHVRSGKTPLLPFFTSPNHTCGKSTSKLVKTKSSLQHFQRF*PSPS diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_14_with_2_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_14_with_2_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ac6688_1/1_1.000_963 +RVAAMDTCQTRIRLSVYPANSVWPSADHARDIHWGGSALALVLITSGRNSSTTTLPSRSQIFMLGPEAAQSQYLLGLKHSELMTSPPSNVYRCLPSFKSHNIAWPSLPPEAHKDPSGDTVTQFR*PECPKWFVFSLQFVRFQTLTSLSQPQDTMMGF*VFGEKRTHDTHSVCPSS*MVYLHTPRVFHNLMVLSREPDTIWRLSAEKATLSTSFVCPTNLRVV*PVPRSHKRRVPSQDPDRANCPSDDITTSDTKCPCPRKALRGKP*RDSSRVSCHRMSDLSLEADKIISGNCGVVAI*VTHPPCPWRVPRSVICSVMFL +>Ap1491_1/1_1.000_963 +RVAAIDTCHTRIRLSVYPANSVWPSADQARDIH*GGSALALVLITSGRSSSTTTLPSRSQIFMLGPEAAQSQYLLGLKHSELMTSPPSNVYRCLPSFKSHNIAWPSFPPEAHKDPSGDTVTQFR*PECPKWFVFSLQFVRFHTLTSLSQPQDTMMGFWVFGENRTHDTHSVCPSS*MVYLHTPRVFHNLMVLSREPDTI*RLSAEKATLSTSFVCPTNLRVV*PVPRSHKRRVPSQDPDRANCPSDDMTTSDTKCPCPRKALRGKP*RDSSRVSCHRMSDLSLEADKMISGNCGVVAIWVTHPPCPWRVPRSVICSVMFL diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_1_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_1_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am7472_1/1_1.000_254 +?????????????????????????????????????????????????????????????VFLVVFH*LYQTLK*CGTN +>Ap2303_1/1_1.000_424 +???????????????FNLRYKIIWICVHFSTVYCLFIWIHSLLLDNFLINRISEFHFSVKWVLLVILN*LNKALKGCCAN +>Ac3644_1/1_1.000_1626 +QFIFTLFLVVIQSPHFNLRYKIIWICVHFSTVNCLFIWIHSLLLNNFLINSISEFHFSVKWVLLVIFN*LNKAFK*CCAN diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_6_with_2_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_6_with_2_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ap5072_1/1_1.000_437 +PNRCHCSRS*HFAAS*RRSGIRCS*SVRCGPCRFRGRSERARLD*GRCSSPGRWR*SRRCPSFRLCFCPPARKPAAWRPLCGRHPGLDPSRSGILEGIFSCRRLYQTV*IGP*AVCP*PCSPWPSGPFSAP*RS +>Ac1013_1/1_1.000_525 +PSRCRCSQS*HFAAS*RRSRIRRS*SVRCGPCRWRGRS*RARLDQGRCSSLGKWR*PRRCPSFRLCFCPPARKPAAWLPLCGRHPGLGPSRSSILEGLFSYRHLYQTV*IEP*VVCP*PCSLWP*GLFSAP*RS diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +????????????????????????????VSVVDVKWHVRSRYSKLLPFLALSDHTCGKTT????????????????????? +>Ap5050_1/1_1.000_243 +LLQKLVAD*EIWHSQARTKKEDEPRHQFVSVVHVERHVRSGKTPLLPFFTPSNHTCGKSTSKLIKTKSSLQHFHSF*PSPS +>Ac2173_1/1_1.000_330 +LLQKLVAD*EIWHSQARTKKEDEPSHQLVSVVHIERHVRSGKTPLLPFFTSPNHTCGKSTSKLVKTKSSLQHFQRF*PSPS diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_1_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_1_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am7472_1/1_1.000_254 +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------gggtttttcttgtagtattccactagctctaccagactctcaaatgatgcggtaccaat +>Ap2303_1/1_1.000_424 +--------------------------------------------ctttaatcttagatacaaaatcatttggatttgtgtacatttctccactgtatattgcctgttcatctggatccattccttgttgctggacaacttcctcattaaccgcatatctgagtttcatttttctgtaaagtgggttcttcttgtaatacttaactaactcaacaaggctctcaaaggatgctgtgccaat +>Ac3644_1/1_1.000_1626 +cagttcatcttcacgttgtttcttgtagtcatacaaagccctcactttaatcttagatacaaaatcatttggatttgtgtacatttctccactgtaaattgcctgttcatctggatccattccttgttgttgaacaacttcctcattaacagcatatctgagtttcatttttctgtaaagtgggttcttcttgtaatatttaactaactcaacaaggctttcaaatgatgctgtgccaat diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +----------------------------------------------------------------------------------ttgtatctgttgttgatgtcaaatggcatgttcgatccaggtactccaagctgttgccattcttggctttgtctgaccacacttgtggaaaaacgacgag------------------------------------------------------------- +>Ap5050_1/1_1.000_243 +ctccttcagaagctggtggcggactaagagatatggcactcccaagccagaaccaaaaaagaggatgaaccacgccatcaatttgtatcggttgttcatgtcgaaaggcatgttagatccgggaagacccctctgttgccattcttcactccgtcgaaccacacttgtggaaaatcgacgagtaagttgattaagacgaagagctctctgcaacattttcacagcttctgaccttccccaagc +>Ac2173_1/1_1.000_330 +ctccttcagaagctggtggcggactaagagatatggcactcccaagccagaaccaaaaaagaggatgaaccaagccatcaacttgtatcggttgttcatatcgaaaggcatgttagatccgggaagacccctttgttgccattcttcacttcgccgaaccacacttgtggaaaatcgacgagtaagctggttaagacgaagagctctctgcaacattttcaacgcttctgaccttctccaagc diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_14_with_2_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_14_with_2_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ac6688_1/1_1.000_963 +cgggtagctgccatggacacctgccagacacggatcaggttgtctgtgtatcctgcaaacagcgtctggccatcagctgaccatgccagggatatacactggggtggttcagcgctggcactggtactgatcacttctggacgcaactcatccacaacaaccttgccttccagatcccagatctttatgcttggtccagaggcagcacaaagccaatatctgttggggctgaagcacagtgagttgatgacatcaccaccatccaatgtgtacagatgcttgccttcattcaaatcccacaacattgcctggccatccttaccaccagaagcacacaaagatccatcaggggacacggtgacacagttcaggtaacctgagtgtccaaagtggtttgtttttagtttgcagtttgtcagattccaaaccttaaccagtttgtcccagccacaagacacaatgatgggattctgagtgtttggtgagaagcgaacacatgatacccactctgtgtgtccatcttcctgaatggtgtacttgcatacaccgagagtgttccacaacttgatggtcttgtcacgtgaacctgacacaatctggcggttatcagctgagaaggccacgcttagcacatcttttgtgtgtccgacaaacctacgagttgtctgaccagtgccaagatcccacaaacgaagggttccatcccaggatccagacagagcgaactgtccatctgatgacataacaacgtcagacacgaaatgtccatgtccacgcaaggccttgcgagggaaaccgtagcgcgattcctcacgagtcagctgccacaggatgagcgatttgtctcttgaagccgacaaaataatatcgggaaattgtggcgttgtagcaatttgagttacccatcctccgtgcccttggagggtaccgcgaagcgtcatttgctccgtcatgtttctt +>Ap1491_1/1_1.000_963 +cgggtagctgccatagatacctgccacacacgaatcaggttgtctgtgtatccagcaaacagtgtctggccatcagctgaccaagccagggatatacactgaggtggctcggcactggcactggtgctgatcacttctggacgcagctcatccacaacaaccttgccttccagatcccagatctttatgcttggtccagaagcagcacaaagccagtatctgttggggctgaagcacagtgagttgatgacatcaccaccatccaatgtgtacagatgcttgccttcattcaaatcccataacattgcctggccatcttttccaccagaagcgcacaaagatccatcaggggacacagtgacacagttcagataacctgagtgtccgaagtggtttgtttttagcttgcagtttgtcagattccacaccttaaccagtttgtcccagccacaggacacaatgatgggattctgggtgtttggtgagaatcgaacacatgatacccactctgtgtgcccatcttcctgaatggtatacttgcacaccccaagagtgttccacaacttgatggtcttgtcacgtgaacctgacacaatctgacggttatcagctgagaaagccacacttagcacgtccttcgtgtgtccaacaaacctacgagttgtctgaccagtgccaagatcccacaaacgaagggttccatcccaggatccagacagggcgaactgtccatctgatgacatgacgacgtcagacacgaagtgtccatgtccgcgcaaggccttgcgagggaagccgtaacgcgattcctcgcgagtcagctgccacagaatgagcgatttgtctctagaagccgacaaaatgatatcaggaaattgtggcgttgtagcaatttgggttacccatcctccgtgcccttggagggtaccgcgaagcgtcatttgctccgtcatgtttctt diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_1_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_1_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am7472_1/1_1.000_254 +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------gggtttttcttgtagtattccactagctctaccagactctcaaatgatgcggtaccaat +>Ap2303_1/1_1.000_424 +--------------------------------------------ctttaatcttagatacaaaatcatttggatttgtgtacatttctccactgtatattgcctgttcatctggatccattccttgttgctggacaacttcctcattaaccgcatatctgagtttcatttttctgtaaagtgggttcttcttgtaatacttaactaactcaacaaggctctcaaaggatgctgtgccaat +>Ac3644_1/1_1.000_1626 +cagttcatcttcacgttgtttcttgtagtcatacaaagccctcactttaatcttagatacaaaatcatttggatttgtgtacatttctccactgtaaattgcctgttcatctggatccattccttgttgttgaacaacttcctcattaacagcatatctgagtttcatttttctgtaaagtgggttcttcttgtaatatttaactaactcaacaaggctttcaaatgatgctgtgccaat diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_6_with_2_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_6_with_2_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ap5072_1/1_1.000_437 +cccaatcgctgtcactgctctcgaagttgacattttgctgcttcatgaagaaggtcaggaattcgttgtagttgatccgtccgctgtggtccttgtcgatttcgtggacgatctgaaagagctcgtctggattgaggaaggtgttcatctcccggaagatggcgttgatctcgccgatgtccaagcttccgtctttgtttttgtccgcccgctcgaaagcctgccgcatggcggcctctttgtggtagacatccgggacttgacccatcgcgatcaggtattcttgaagggatattttcttgtcgccgtctttatcaaactgtttgaattgggccttgagctgtttgcccttgaccttgtagcccttggccttcagggcctttttcagctccttgaaggtca +>Ac1013_1/1_1.000_525 +cccagtcgctgtcgctgctctcaaagttgacattttgctgcttcatgaagaaggtcaagaattcgtcgtagttgatccgtccgctgtggtccttgtcgatggcgtggacgatcttgaagagctcgtctggatcaaggaaggtgttcatctcttggaaaatggcgttaacctcgccgatgtccaagcttccgtctttgtttttgtccgcccgctcgaaagcctgccgcatggctgcctctttgtggtaggcatccgggacttggcccatcgcgatcaagtattcttgaagggttattttcttatcgccatctttatcaaactgtttgaattgagccttgagttgtttgcccttgaccttgtagcctttggccttgagggcttttttcagctccttgaaggtca diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +----------------------------------------------------------------------------------ttgtatctgttgttgatgtcaaatggcatgttcgatccaggtactccaagctgttgccattcttggctttgtctgaccacacttgtggaaaaacgacgag------------------------------------------------------------- +>Ap5050_1/1_1.000_243 +ctccttcagaagctggtggcggactaagagatatggcactcccaagccagaaccaaaaaagaggatgaaccacgccatcaatttgtatcggttgttcatgtcgaaaggcatgttagatccgggaagacccctctgttgccattcttcactccgtcgaaccacacttgtggaaaatcgacgagtaagttgattaagacgaagagctctctgcaacattttcacagcttctgaccttccccaagc +>Ac2173_1/1_1.000_330 +ctccttcagaagctggtggcggactaagagatatggcactcccaagccagaaccaaaaaagaggatgaaccaagccatcaacttgtatcggttgttcatatcgaaaggcatgttagatccgggaagacccctttgttgccattcttcacttcgccgaaccacacttgtggaaaatcgacgagtaagctggttaagacgaagagctctctgcaacattttcaacgcttctgaccttctccaagc diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_aa/test1/orthogroup_1_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_aa/test1/orthogroup_1_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am7472_1/1_1.000_254 +?????????????????????????????????????????????????????????????VFLVVFH +>Ap2303_1/1_1.000_424 +???????????????FNLRYKIIWICVHFSTVYCLFIWIHSLLLDNFLINRISEFHFSVKWVLLVILN +>Ac3644_1/1_1.000_1626 +QFIFTLFLVVIQSPHFNLRYKIIWICVHFSTVNCLFIWIHSLLLNNFLINSISEFHFSVKWVLLVIFN diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_aa/test1/orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_aa/test1/orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +???????????????????VSVVDVKWHVRSRYSKLLPFLALSDHTCGKTT???????????????? +>Ap5050_1/1_1.000_243 +EIWHSQARTKKEDEPRHQFVSVVHVERHVRSGKTPLLPFFTPSNHTCGKSTSKLIKTKSSLQHFHSF +>Ac2173_1/1_1.000_330 +EIWHSQARTKKEDEPSHQLVSVVHIERHVRSGKTPLLPFFTSPNHTCGKSTSKLVKTKSSLQHFQRF diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_aa/test2/orthogroup_14_with_2_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_aa/test2/orthogroup_14_with_2_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ac6688_1/1_1.000_963 +GGSALALVLITSGRNSSTTTLPSRSQIFMLGPEAAQSQYLLGLKHSELMTSPPSNVYRCLPSFKSHNIAWPSLPPEAHKDPSGDTVTQFR +>Ap1491_1/1_1.000_963 +GGSALALVLITSGRSSSTTTLPSRSQIFMLGPEAAQSQYLLGLKHSELMTSPPSNVYRCLPSFKSHNIAWPSFPPEAHKDPSGDTVTQFR diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_aa/test2/orthogroup_1_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_aa/test2/orthogroup_1_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am7472_1/1_1.000_254 +?????????????????????????????????????????????????????????????VFLVVFH +>Ap2303_1/1_1.000_424 +???????????????FNLRYKIIWICVHFSTVYCLFIWIHSLLLDNFLINRISEFHFSVKWVLLVILN +>Ac3644_1/1_1.000_1626 +QFIFTLFLVVIQSPHFNLRYKIIWICVHFSTVNCLFIWIHSLLLNNFLINSISEFHFSVKWVLLVIFN diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_aa/test2/orthogroup_6_with_2_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_aa/test2/orthogroup_6_with_2_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ap5072_1/1_1.000_437 +SRRCPSFRLCFCPPARKPAAWRPLCGRHPGLDPSRSGILEGIFSCRRLYQTV +>Ac1013_1/1_1.000_525 +PRRCPSFRLCFCPPARKPAAWLPLCGRHPGLGPSRSSILEGLFSYRHLYQTV diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_aa/test2/orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_aa/test2/orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +???????????????????VSVVDVKWHVRSRYSKLLPFLALSDHTCGKTT???????????????? +>Ap5050_1/1_1.000_243 +EIWHSQARTKKEDEPRHQFVSVVHVERHVRSGKTPLLPFFTPSNHTCGKSTSKLIKTKSSLQHFHSF +>Ac2173_1/1_1.000_330 +EIWHSQARTKKEDEPSHQLVSVVHIERHVRSGKTPLLPFFTSPNHTCGKSTSKLVKTKSSLQHFQRF diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_1_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_1_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am7472_1/1_1.000_254 +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------gggtttttcttgtagtattccac +>Ap2303_1/1_1.000_424 +--------------------------------------------ctttaatcttagatacaaaatcatttggatttgtgtacatttctccactgtatattgcctgttcatctggatccattccttgttgctggacaacttcctcattaaccgcatatctgagtttcatttttctgtaaagtgggttcttcttgtaatacttaac +>Ac3644_1/1_1.000_1626 +cagttcatcttcacgttgtttcttgtagtcatacaaagccctcactttaatcttagatacaaaatcatttggatttgtgtacatttctccactgtaaattgcctgttcatctggatccattccttgttgttgaacaacttcctcattaacagcatatctgagtttcatttttctgtaaagtgggttcttcttgtaatatttaac diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +-------------------------------------------------------ttgtatctgttgttgatgtcaaatggcatgttcgatccaggtactccaagctgttgccattcttggctttgtctgaccacacttgtggaaaaacgacgag---------------------------------------------- +>Ap5050_1/1_1.000_243 +gagatatggcactcccaagccagaaccaaaaaagaggatgaaccacgccatcaatttgtatcggttgttcatgtcgaaaggcatgttagatccgggaagacccctctgttgccattcttcactccgtcgaaccacacttgtggaaaatcgacgagtaagttgattaagacgaagagctctctgcaacattttcacagcttc +>Ac2173_1/1_1.000_330 +gagatatggcactcccaagccagaaccaaaaaagaggatgaaccaagccatcaacttgtatcggttgttcatatcgaaaggcatgttagatccgggaagacccctttgttgccattcttcacttcgccgaaccacacttgtggaaaatcgacgagtaagctggttaagacgaagagctctctgcaacattttcaacgcttc diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_nuc/test2/orthogroup_14_with_2_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_nuc/test2/orthogroup_14_with_2_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ac6688_1/1_1.000_963 +ggtggttcagcgctggcactggtactgatcacttctggacgcaactcatccacaacaaccttgccttccagatcccagatctttatgcttggtccagaggcagcacaaagccaatatctgttggggctgaagcacagtgagttgatgacatcaccaccatccaatgtgtacagatgcttgccttcattcaaatcccacaacattgcctggccatccttaccaccagaagcacacaaagatccatcaggggacacggtgacacagttcagg +>Ap1491_1/1_1.000_963 +ggtggctcggcactggcactggtgctgatcacttctggacgcagctcatccacaacaaccttgccttccagatcccagatctttatgcttggtccagaagcagcacaaagccagtatctgttggggctgaagcacagtgagttgatgacatcaccaccatccaatgtgtacagatgcttgccttcattcaaatcccataacattgcctggccatcttttccaccagaagcgcacaaagatccatcaggggacacagtgacacagttcaga diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_nuc/test2/orthogroup_1_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_nuc/test2/orthogroup_1_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am7472_1/1_1.000_254 +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------gggtttttcttgtagtattccac +>Ap2303_1/1_1.000_424 +--------------------------------------------ctttaatcttagatacaaaatcatttggatttgtgtacatttctccactgtatattgcctgttcatctggatccattccttgttgctggacaacttcctcattaaccgcatatctgagtttcatttttctgtaaagtgggttcttcttgtaatacttaac +>Ac3644_1/1_1.000_1626 +cagttcatcttcacgttgtttcttgtagtcatacaaagccctcactttaatcttagatacaaaatcatttggatttgtgtacatttctccactgtaaattgcctgttcatctggatccattccttgttgttgaacaacttcctcattaacagcatatctgagtttcatttttctgtaaagtgggttcttcttgtaatatttaac diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_nuc/test2/orthogroup_6_with_2_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_nuc/test2/orthogroup_6_with_2_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ap5072_1/1_1.000_437 +tctcgccgatgtccaagcttccgtctttgtttttgtccgcccgctcgaaagcctgccgcatggcggcctctttgtggtagacatccgggacttgacccatcgcgatcaggtattcttgaagggatattttcttgtcgccgtctttatcaaactgtt +>Ac1013_1/1_1.000_525 +cctcgccgatgtccaagcttccgtctttgtttttgtccgcccgctcgaaagcctgccgcatggctgcctctttgtggtaggcatccgggacttggcccatcgcgatcaagtattcttgaagggttattttcttatcgccatctttatcaaactgtt diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_05_CDS_nuc/test2/orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_05_CDS_nuc/test2/orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +-------------------------------------------------------ttgtatctgttgttgatgtcaaatggcatgttcgatccaggtactccaagctgttgccattcttggctttgtctgaccacacttgtggaaaaacgacgag---------------------------------------------- +>Ap5050_1/1_1.000_243 +gagatatggcactcccaagccagaaccaaaaaagaggatgaaccacgccatcaatttgtatcggttgttcatgtcgaaaggcatgttagatccgggaagacccctctgttgccattcttcactccgtcgaaccacacttgtggaaaatcgacgagtaagttgattaagacgaagagctctctgcaacattttcacagcttc +>Ac2173_1/1_1.000_330 +gagatatggcactcccaagccagaaccaaaaaagaggatgaaccaagccatcaacttgtatcggttgttcatatcgaaaggcatgttagatccgggaagacccctttgttgccattcttcacttcgccgaaccacacttgtggaaaatcgacgagtaagctggttaagacgaagagctctctgcaacattttcaacgcttc diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_08_CDS_without_indel_aa/test1/old_orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_08_CDS_without_indel_aa/test1/old_orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +-------------------VSVVDVKWHVRSRYSKLLPFLALSDHTCGKTT---------------- +>Ap5050_1/1_1.000_243 +EIWHSQARTKKEDEPRHQFVSVVHVERHVRSGKTPLLPFFTPSNHTCGKSTSKLIKTKSSLQHFHSF +>Ac2173_1/1_1.000_330 +EIWHSQARTKKEDEPSHQLVSVVHIERHVRSGKTPLLPFFTSPNHTCGKSTSKLVKTKSSLQHFQRF diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_08_CDS_without_indel_aa/test1/orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_08_CDS_without_indel_aa/test1/orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +VSVVDVKWHVRSRYSKLLPFLALSDHTCGKTT +>Ap5050_1/1_1.000_243 +VSVVHVERHVRSGKTPLLPFFTPSNHTCGKST +>Ac2173_1/1_1.000_330 +VSVVHIERHVRSGKTPLLPFFTSPNHTCGKST diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_08_CDS_without_indel_aa/test2/orthogroup_14_with_2_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_08_CDS_without_indel_aa/test2/orthogroup_14_with_2_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ac6688_1/1_1.000_963 +GGSALALVLITSGRNSSTTTLPSRSQIFMLGPEAAQSQYLLGLKHSELMTSPPSNVYRCLPSFKSHNIAWPSLPPEAHKDPSGDTVTQFR +>Ap1491_1/1_1.000_963 +GGSALALVLITSGRSSSTTTLPSRSQIFMLGPEAAQSQYLLGLKHSELMTSPPSNVYRCLPSFKSHNIAWPSFPPEAHKDPSGDTVTQFR diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_08_CDS_without_indel_nuc/test1/old_orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_08_CDS_without_indel_nuc/test1/old_orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +---------------------------------------------------------gtatctgttgttgatgtcaaatggcatgttcgatccaggtactccaagctgttgccattcttggctttgtctgaccacacttgtggaaaaacgacg------------------------------------------------ +>Ap5050_1/1_1.000_243 +gagatatggcactcccaagccagaaccaaaaaagaggatgaaccacgccatcaatttgtatcggttgttcatgtcgaaaggcatgttagatccgggaagacccctctgttgccattcttcactccgtcgaaccacacttgtggaaaatcgacgagtaagttgattaagacgaagagctctctgcaacattttcacagcttc +>Ac2173_1/1_1.000_330 +gagatatggcactcccaagccagaaccaaaaaagaggatgaaccaagccatcaacttgtatcggttgttcatatcgaaaggcatgttagatccgggaagacccctttgttgccattcttcacttcgccgaaccacacttgtggaaaatcgacgagtaagctggttaagacgaagagctctctgcaacattttcaacgcttc diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_08_CDS_without_indel_nuc/test1/orthogroup_7_with_3_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_08_CDS_without_indel_nuc/test1/orthogroup_7_with_3_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,6 @@ +>Am3527_1/1_1.000_270 +gtatctgttgttgatgtcaaatggcatgttcgatccaggtactccaagctgttgccattcttggctttgtctgaccacacttgtggaaaaacgacg +>Ap5050_1/1_1.000_243 +gtatcggttgttcatgtcgaaaggcatgttagatccgggaagacccctctgttgccattcttcactccgtcgaaccacacttgtggaaaatcgacg +>Ac2173_1/1_1.000_330 +gtatcggttgttcatatcgaaaggcatgttagatccgggaagacccctttgttgccattcttcacttcgccgaaccacacttgtggaaaatcgacg diff -r 000000000000 -r eb95bf7f90ae test-data/outputs_ORF_Search_08_CDS_without_indel_nuc/test2/orthogroup_14_with_2_species.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs_ORF_Search_08_CDS_without_indel_nuc/test2/orthogroup_14_with_2_species.fasta Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,4 @@ +>Ac6688_1/1_1.000_963 +ggtggttcagcgctggcactggtactgatcacttctggacgcaactcatccacaacaaccttgccttccagatcccagatctttatgcttggtccagaggcagcacaaagccaatatctgttggggctgaagcacagtgagttgatgacatcaccaccatccaatgtgtacagatgcttgccttcattcaaatcccacaacattgcctggccatccttaccaccagaagcacacaaagatccatcaggggacacggtgacacagttcagg +>Ap1491_1/1_1.000_963 +ggtggctcggcactggcactggtgctgatcacttctggacgcagctcatccacaacaaccttgccttccagatcccagatctttatgcttggtccagaagcagcacaaagccagtatctgttggggctgaagcacagtgagttgatgacatcaccaccatccaatgtgtacagatgcttgccttcattcaaatcccataacattgcctggccatcttttccaccagaagcgcacaaagatccatcaggggacacagtgacacagttcaga