Mercurial > repos > p.lucas > get_major_minor
changeset 2:908024e80b3d draft
Uploaded
author | p.lucas |
---|---|
date | Mon, 24 Jun 2024 14:31:34 +0000 |
parents | 54d44429c585 |
children | 67061ff36f1c |
files | VCF_create_major_and_minor_reference.py |
diffstat | 1 files changed, 21 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/VCF_create_major_and_minor_reference.py Tue Jun 11 10:05:47 2024 +0000 +++ b/VCF_create_major_and_minor_reference.py Mon Jun 24 14:31:34 2024 +0000 @@ -26,7 +26,7 @@ for line in open(vcf_file): li = line.strip() if not li.startswith("#"): - # print(li) + print(li) name_seq = li.split("\t")[0] if name_seq == name_seq_ref: pos = str(li.split("\t")[1]) @@ -45,22 +45,25 @@ if pos in final_dict.keys(): old_majo = final_dict[pos][0] old_mino = final_dict[pos][1] - freq_majo = final_dict[pos][2] - freq_mino = final_dict[pos][3] - # Cas où la nouvelle fréquence alt est > à la freq majo déjà enregistrée - if freq_alt >= freq_majo: - iupac_to_test = "".join(sorted(old_majo + old_mino)) - if iupac_to_test not in iupac.keys(): - convert_iupac = list(iupac.keys())[list(iupac.values()).index(old_mino)] - iupac_to_test = "".join(sorted(old_majo + convert_iupac)) - final_dict[pos] = [alt, iupac[iupac_to_test], freq_alt, freq_majo + freq_mino, name_seq] + old_freq_majo = final_dict[pos][2] + old_freq_mino = final_dict[pos][3] + if freq_alt > freq_ref: + majo = alt + freq_majo = freq_alt + mino = ref + freq_mino = freq_ref else: - iupac_to_test = "".join(sorted(alt + old_mino)) - if iupac_to_test not in iupac.keys(): - convert_iupac = list(iupac.keys())[list(iupac.values()).index(old_mino)] - iupac_to_test = "".join(sorted(alt + convert_iupac)) - freq_add = freq_alt + freq_mino - final_dict[pos] = [old_majo, iupac[iupac_to_test], freq_majo, freq_add, name_seq] + majo = ref + freq_majo = freq_ref + mino = alt + freq_mino = freq_alt + if freq_majo > old_freq_majo: + if freq_mino < old_freq_majo: + final_dict[pos] = [majo, old_majo, freq_majo, old_freq_majo, name_seq] + else: + final_dict[pos] = [majo, mino, freq_majo, freq_mino, name_seq] + elif freq_majo > old_freq_mino: + final_dict[pos] = [old_majo, majo, old_freq_majo, freq_majo, name_seq] # Nouvelle position else: # definition colonne: @@ -116,14 +119,8 @@ # print(mut_dict) for mut in mut_dict: # print(mut_dict[mut]) - if bool(re.search('[A-Z]', mut_dict[mut][0])) is False: - major_seq[int(mut)-1] = 'N' - else: - major_seq[int(mut)-1] = mut_dict[mut][0] - if bool(re.search('[A-Z]', mut_dict[mut][1])) is False: - minor_seq[int(mut)-1] = 'N' - else: - minor_seq[int(mut)-1] = mut_dict[mut][1] + major_seq[int(mut)-1] = mut_dict[mut][0] + minor_seq[int(mut)-1] = mut_dict[mut][1] major_ref.append(SeqRecord(major_seq, id=f"{ide}_MAJOR", description="")) minor_ref.append(SeqRecord(minor_seq, id=f"{ide}_MINOR", description=""))