Mercurial > repos > mmonot > phageterm
changeset 7:d48bdee70f20 draft
Uploaded
author | mmonot |
---|---|
date | Fri, 09 Jun 2017 06:00:29 -0400 |
parents | 8c56dd9c4b6e |
children | 2b1a4c2c3b83 |
files | phageterm/._.DS_Store phageterm/._PhageTerm.py phageterm/._PhageTerm.xml phageterm/._READ_ME.txt phageterm/README.txt phageterm/READ_ME.txt phageterm/_modules/._functions_PhageTerm.py phageterm/_modules/functions_PhageTerm.py |
diffstat | 8 files changed, 56 insertions(+), 135 deletions(-) [+] |
line wrap: on
line diff
--- a/phageterm/README.txt Fri Jun 09 04:45:21 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,129 +0,0 @@ -PROGRAM -======= - -PhageTerm.py - run as command line in a shell - - -VERSION -======= - -Version 1.0.8 - - -INTRODUCTION -============ - -PhageTerm software is a tool to determine phage termini and packaging mode -from high throughput sequences that rely on the random fragmentation of DNA (e.g. -Illumina TruSeq). Phage sequencing reads from a fastq file are aligned to the phage -reference genome in order to calculate two types of coverage values (whole genome coverage -and the starting position coverage). The starting position coverage is used to perform a -detailed termini analysis. If the user provides the host sequence, reads that does not -match the phage genome are tested on the host using the same mapping function. - -The PhageTerm program and information is available at https://sourceforge.net/projects/phageterm/ - -A Galaxy wrapper version is also available at https://galaxy.pasteur.fr - - -PREREQUISITES -============= - -Unix/Linux - -- Python 2.7 -- matplotlib 2.0.2 -- numpy 1.11 -- pandas 0.19.1 -- sklearn 0.18.1 -- scipy 0.19.0 -- statsmodels 0.0.0 -- reportlab 3.4.0 - - -COMMAND LINE -============ - - - ./PhageTerm.py -f reads.fastq -r phage_sequence.fasta [-n phage_name -p reads_paired - -s seed_lenght -d surrounding -t installation_test -c nbr_core -g host.fasta - (warning increase process time)] - - - Help: - - ./PhageTerm.py -h - ./PhageTerm.py --help - - Options: - - Raw reads file in fastq format: - -f INPUT_FILE, --fastq=INPUT_FILE - Fastq reads - (NGS sequences from random fragmentation DNA only, - e.g. Illumina TruSeq) - - Raw reads file in fastq format: - -p INPUT_FILE, --paired=INPUT_FILE - Paired fastq reads - (NGS sequences from random fragmentation DNA only, - e.g. Illumina TruSeq) - - Phage genome in fasta format: - -r INPUT_FILE, --ref=INPUT_FILE - Reference phage genome as unique contig in fasta format - - Name of the phage being analyzed by the user: - -n PHAGE_NAME, --phagename=PHAGE_NAME - Manually enter the name of the phage being analyzed. - Used as prefix for output files. - - Lenght of the seed used for reads in the mapping process: - -s SEED_LENGHT, --seed=SEED_LENGHT - Manually enter the lenght of the seed used for reads - in the mapping process (Default: 20). - - Lenght of the seed used for reads in the mapping process: - -d SUROUNDING_LENGHT, --surrounding=SUROUNDING_LENGHT - Manually enter the lenght of the surrounding used to - merge close peaks in the analysis process (Default: 20). - - Host genome in fasta format: - -g INPUT_FILE, --host=INPUT_FILE - Reference host genome as unique contig in fasta format - Warning: increase drastically process time - - Core processor number to use: - -c CORE_NBR, --core=CORE_NBR - Number of core processor to use (Default: 1). - - Define phage mean coverage: - -m MEAN_NBR, --mean=MEAN_NBR - Phage mean coverage to use (Default: 250). - - Software run test: - -t TEST_VALUE, --test=TEST_VALUE - TEST_VALUE=C5 : Test run for a 5' cohesive end (e.g. Lambda) - TEST_VALUE=C3 : Test run for a 3' cohesive end (e.g. HK97) - TEST_VALUE=DS : Test run for a short Direct Terminal Repeats end (e.g. T7) - TEST_VALUE=DL : Test run for a long Direct Terminal Repeats end (e.g. T5) - TEST_VALUE=H : Test run for a Headful packaging (e.g. P1) - TEST_VALUE=M : Test run for a Mu-like packaging (e.g. Mu) - - -OUTPUT FILES -========== - - (i) Report (.pdf) - - (ii) Statistical table (.csv) - - (iii) Sequence files (.fasta) - - -CONTACT -======= - -Julian Garneau <julian.garneau@usherbrooke.ca> -Marc Monot <marc.monot@pasteur.fr> -David Bikard <david.bikard@pasteur.fr>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phageterm/READ_ME.txt Fri Jun 09 06:00:29 2017 -0400 @@ -0,0 +1,45 @@ +PROGRAM +======= + +This is a wrapper for PhageTerm + + +VERSION +======= + +Version 1.0.8 + + +INTRODUCTION +============ + +PhageTerm software is a tool to determine phage termini and packaging mode +from high throughput sequences that rely on the random fragmentation of DNA (e.g. +Illumina TruSeq but NOT Nextera). Phage sequencing reads from a fastq file are aligned to +the phage reference genome in order to calculate two types of coverage values (whole +genome coverage and the starting position coverage). The starting position coverage is +used to perform a detailed termini analysis. If the user provides the host sequence, reads +that does not match the phage genome are tested on the host using the same mapping function. + +Preprint manuscript: http://biorxiv.org/content/early/2017/02/16/108100 +Source Code: https://sourceforge.net/projects/phageterm. + + +PREREQUISITES +============= + +- Python 2.7.X +- matplotlib 2.0.2 +- numpy 1.11 +- pandas 0.19.1 +- sklearn 0.18.1 +- scipy 0.19.0 +- statsmodels 0.8.0 +- reportlab 3.4.0 + +CONTACT +======= + +Julian Garneau <julian.garneau@usherbrooke.ca> +Marc Monot <marc.monot@pasteur.fr> +David Bikard <david.bikard@pasteur.fr>
--- a/phageterm/_modules/functions_PhageTerm.py Fri Jun 09 04:45:21 2017 -0400 +++ b/phageterm/_modules/functions_PhageTerm.py Fri Jun 09 06:00:29 2017 -0400 @@ -682,11 +682,11 @@ res_plus = pd.DataFrame({"Position": res['Position'], "SPC_std": res['SPC_norm_plus']*100, "SPC": res['SPC_norm_plus_close']*100, "pval_gamma": res['pval_plus'] , "pval_gamma_adj": res['pval_plus_adj']}) res_minus = pd.DataFrame({"Position": res['Position'], "SPC_std": res['SPC_norm_minus']*100, "SPC": res['SPC_norm_minus_close']*100, "pval_gamma": res['pval_minus'] , "pval_gamma_adj": res['pval_minus_adj']}) - res_plus.sort_values("SPC", ascending=[0], inplace=[1]) - res_minus.sort_values("SPC", ascending=[0], inplace=[1]) + res_plus.sort_values("SPC", ascending=False, inplace=True) + res_minus.sort_values("SPC", ascending=False, inplace=True) - res_plus.reset_index(drop=True, inplace=[1]) - res_minus.reset_index(drop=True, inplace=[1]) + res_plus.reset_index(drop=True, inplace=True) + res_minus.reset_index(drop=True, inplace=True) return res, res_plus, res_minus @@ -716,7 +716,7 @@ """Return significant peaks over a limit""" table_pvalue = table.loc[lambda df: df.pval_gamma_adj < pvalue,:] table_pvalue_limit = table_pvalue.loc[lambda df: df.SPC > limit,:] - table_pvalue_limit.reset_index(drop=True, inplace=[1]) + table_pvalue_limit.reset_index(drop=True, inplace=True) return table_pvalue_limit @@ -1213,7 +1213,7 @@ else: # Texte axes.text(0.4, 0.7, r"NEW", fontsize=50, fontweight='bold') - axes.text(0.48, 0.3, r"!", fontsize=200, fontweight='bold') + axes.text(0.44, 0.3, r"!", fontsize=200, fontweight='bold') # Draw graph if draw: @@ -1449,6 +1449,11 @@ ptext = '<i><font size=12>*Direct Terminal Repeats: ' + str(len(P_seqcoh)) + ' bp</font></i>' report.append(Paragraph(ptext, styles["Left"])) + # Multiple / Multiple (Nextera) + if P_left == "Multiple" and P_right == "Multiple": + ptext = '<i><font size=12>*This results could be due to a non-random fragmented sequence (e.g. Nextera)</font></i>' + report.append(Paragraph(ptext, styles["Left"])) + # Concatermer elif P_class[:7] == "Headful" and paired != "": ptext = '<i><font size=12>*concatemer estimation: ' + str(P_concat) + '</font></i>'