# HG changeset patch # User mmonot # Date 1496997881 14400 # Node ID 405091e3182ca5571e839ea256044e5db49f33aa # Parent 3d7c80089e27eb20b93f21a261166ff9c79cc499 Uploaded diff -r 3d7c80089e27 -r 405091e3182c phageterm/._PhageTerm.py Binary file phageterm/._PhageTerm.py has changed diff -r 3d7c80089e27 -r 405091e3182c phageterm/._PhageTerm.xml Binary file phageterm/._PhageTerm.xml has changed diff -r 3d7c80089e27 -r 405091e3182c phageterm/._README.txt Binary file phageterm/._README.txt has changed diff -r 3d7c80089e27 -r 405091e3182c phageterm/PhageTerm.py --- a/phageterm/PhageTerm.py Wed Jun 07 10:39:50 2017 -0400 +++ b/phageterm/PhageTerm.py Fri Jun 09 04:44:41 2017 -0400 @@ -46,7 +46,7 @@ usage = """\n\nUsage: %prog -f reads.fastq -r phage_sequence.fasta [-n phage_name -p reads_paired -s seed_lenght -d surrounding -t installation_test -c nbr_core -g host.fasta (warning increase process time)] Program: PhageTerm - Analyze phage termini and packaging mode using reads from high-throughput sequenced phage data - Version: 1.0.7 + Version: 1.0.8 Contact: Julian Garneau Contact: David Bikard Contact: Marc Monot diff -r 3d7c80089e27 -r 405091e3182c phageterm/_modules/._functions_PhageTerm.py Binary file phageterm/_modules/._functions_PhageTerm.py has changed diff -r 3d7c80089e27 -r 405091e3182c phageterm/_modules/functions_PhageTerm.py --- a/phageterm/_modules/functions_PhageTerm.py Wed Jun 07 10:39:50 2017 -0400 +++ b/phageterm/_modules/functions_PhageTerm.py Fri Jun 09 04:44:41 2017 -0400 @@ -1242,8 +1242,8 @@ else: export["Coverage +"] = whole_coverage[0] export["SPC +"] = termini_coverage[0] - export["X +"] = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC_std"])] - export["X + (close)"] = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC"])] + export["T +"] = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC_std"])] + export["T + (close)"] = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC"])] export["pvalue +"] = [format(x,'0.2e') for x in list(phage_plus_norm.sort_values("Position")["pval_gamma"])] export["padj +"] = [format(x,'0.2e') for x in list(phage_plus_norm.sort_values("Position")["pval_gamma_adj"])] if paired != "": @@ -1251,8 +1251,8 @@ else: export["Coverage -"] = paired_whole_coverage[1] export["SPC -"] = termini_coverage[1] - export["X -"] = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC_std"])] - export["X - (close)"] = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC"])] + export["T -"] = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC_std"])] + export["T - (close)"] = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC"])] export["pvalue -"] = [format(x,'0.2e') for x in list(phage_minus_norm.sort_values("Position")["pval_gamma"])] export["padj -"] = [format(x,'0.2e') for x in list(phage_minus_norm.sort_values("Position")["pval_gamma_adj"])] filout = open(phagename + "_statistics.csv", "w") @@ -1474,7 +1474,7 @@ IMAGE = Image(IMG.fileName, width=240, height=340, kind='proportional') IMAGE.hAlign = 'CENTER' - data = [["Strand", "Location", "X", "pvalue", "X (Start. Pos. Cov. / Whole Cov.)"], ["+",phage_plus_norm["Position"][0],format(phage_plus_norm["SPC"][0]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][0], '0.2e'),IMAGE], ["",phage_plus_norm["Position"][1],format(phage_plus_norm["SPC"][1]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][1], '0.2e'),""], ["",phage_plus_norm["Position"][2],format(phage_plus_norm["SPC"][2]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][2], '0.2e'),""], ["",phage_plus_norm["Position"][3],format(phage_plus_norm["SPC"][3]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][3], '0.2e'),""], ["",phage_plus_norm["Position"][4],format(phage_plus_norm["SPC"][4]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][4], '0.2e'),""], ["-",phage_minus_norm["Position"][0],format(phage_minus_norm["SPC"][0]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][0], '0.2e'),""], ["",phage_minus_norm["Position"][1],format(phage_minus_norm["SPC"][1]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][1], '0.2e'),""], ["",phage_minus_norm["Position"][2],format(phage_minus_norm["SPC"][2]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][2], '0.2e'),""], ["",phage_minus_norm["Position"][3],format(phage_minus_norm["SPC"][3]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][3], '0.2e'),""], ["",phage_minus_norm["Position"][4],format(phage_minus_norm["SPC"][4]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][4], '0.2e'),""]] + data = [["Strand", "Location", "T", "pvalue", "T (Start. Pos. Cov. / Whole Cov.)"], ["+",phage_plus_norm["Position"][0],format(phage_plus_norm["SPC"][0]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][0], '0.2e'),IMAGE], ["",phage_plus_norm["Position"][1],format(phage_plus_norm["SPC"][1]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][1], '0.2e'),""], ["",phage_plus_norm["Position"][2],format(phage_plus_norm["SPC"][2]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][2], '0.2e'),""], ["",phage_plus_norm["Position"][3],format(phage_plus_norm["SPC"][3]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][3], '0.2e'),""], ["",phage_plus_norm["Position"][4],format(phage_plus_norm["SPC"][4]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][4], '0.2e'),""], ["-",phage_minus_norm["Position"][0],format(phage_minus_norm["SPC"][0]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][0], '0.2e'),""], ["",phage_minus_norm["Position"][1],format(phage_minus_norm["SPC"][1]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][1], '0.2e'),""], ["",phage_minus_norm["Position"][2],format(phage_minus_norm["SPC"][2]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][2], '0.2e'),""], ["",phage_minus_norm["Position"][3],format(phage_minus_norm["SPC"][3]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][3], '0.2e'),""], ["",phage_minus_norm["Position"][4],format(phage_minus_norm["SPC"][4]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][4], '0.2e'),""]] t=Table(data, 4*[1*inch]+1*[4*inch], 11*[0.25*inch], hAlign='CENTER', style=[('SPAN',(0,1),(0,5)), ('SPAN',(0,6),(0,10)), ('SPAN',(4,1),(4,10)), ('LINEABOVE',(0,1),(4,1),1.5,colors.black), ('LINEABOVE',(0,6),(4,6),1.5,colors.grey), ('FONT',(0,0),(-1,0),'Helvetica-Bold'), ('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('FONTSIZE',(0,1),(0,-1),16), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')]) report.append(t) @@ -1717,7 +1717,7 @@ report.append(Paragraph(ptext, styles["Left"])) report.append(Spacer(1, 10)) - ptext = 'Reads are mapped on the reference to determine the starting position coverage (SPC) as well as the coverage (COV) in each orientation. These values are then used to compute the variable X = SPC / COV. The average value of X at positions along the genome that are not termini is expected to be 1/F, where F is the average fragment size. For the termini that depends of the packaging mode. Cos Phages: no reads should start before the terminus and therefore X=1. DTR phages: for N phages present in the sample, there should be N fragments that start at the terminus and N fragments that cover the edge of the repeat on the other side of the genome as a results X is expected to be 0.5. Pac phages: for N phages in the sample, there should be N/C fragments starting at the pac site, where C is the number of phage genome copies per concatemer. In the same sample N fragments should cover the pac site position, X is expected to be (N/C)/(N+N/C) = 1/(1+C). To assess whether the number of reads starting at a given position along the genome can be considered a significant outlier, PhageTerm first segments the genome according to coverage using a regression tree. A gamma distribution is then fitted to SPC for each segment and an adjusted p-value is computed for each position. Finally if several significant peaks are detected within a small sequence window (default: 20bp), their X values are merged.' + ptext = 'Reads are mapped on the reference to determine the starting position coverage (SPC) as well as the coverage (COV) in each orientation. These values are then used to compute the variable T = SPC / COV. The average value of T at positions along the genome that are not termini is expected to be 1/F, where F is the average fragment size. For the termini that depends of the packaging mode. Cos Phages: no reads should start before the terminus and therefore X=1. DTR phages: for N phages present in the sample, there should be N fragments that start at the terminus and N fragments that cover the edge of the repeat on the other side of the genome as a results T is expected to be 0.5. Pac phages: for N phages in the sample, there should be N/C fragments starting at the pac site, where C is the number of phage genome copies per concatemer. In the same sample N fragments should cover the pac site position, T is expected to be (N/C)/(N+N/C) = 1/(1+C). To assess whether the number of reads starting at a given position along the genome can be considered a significant outlier, PhageTerm first segments the genome according to coverage using a regression tree. A gamma distribution is then fitted to SPC for each segment and an adjusted p-value is computed for each position. Finally if several significant peaks are detected within a small sequence window (default: 20bp), their T values are merged.' report.append(Paragraph(ptext, styles["Justify"])) report.append(Spacer(1, 5))