changeset 7:d48bdee70f20 draft

Uploaded
author mmonot
date Fri, 09 Jun 2017 06:00:29 -0400
parents 8c56dd9c4b6e
children 2b1a4c2c3b83
files phageterm/._.DS_Store phageterm/._PhageTerm.py phageterm/._PhageTerm.xml phageterm/._READ_ME.txt phageterm/README.txt phageterm/READ_ME.txt phageterm/_modules/._functions_PhageTerm.py phageterm/_modules/functions_PhageTerm.py
diffstat 8 files changed, 56 insertions(+), 135 deletions(-) [+]
line wrap: on
line diff
Binary file phageterm/._.DS_Store has changed
Binary file phageterm/._PhageTerm.py has changed
Binary file phageterm/._PhageTerm.xml has changed
Binary file phageterm/._READ_ME.txt has changed
--- a/phageterm/README.txt	Fri Jun 09 04:45:21 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,129 +0,0 @@
-PROGRAM
-=======
-
-PhageTerm.py - run as command line in a shell
-
-
-VERSION
-=======
-
-Version 1.0.8
-
-
-INTRODUCTION
-============
-
-PhageTerm software is a tool to determine phage termini and packaging mode
-from high throughput sequences that rely on the random fragmentation of DNA (e.g. 
-Illumina TruSeq). Phage sequencing reads from a fastq file are aligned to the phage 
-reference genome in order to calculate two types of coverage values (whole genome coverage 
-and the starting position coverage). The starting position coverage is used to perform a 
-detailed termini analysis. If the user provides the host sequence, reads that does not 
-match the phage genome are tested on the host using the same mapping function.
-
-The PhageTerm program and information is available at https://sourceforge.net/projects/phageterm/
-
-A Galaxy wrapper version is also available at https://galaxy.pasteur.fr
-
-
-PREREQUISITES
-=============
-
-Unix/Linux
-
-- Python      2.7
-- matplotlib  2.0.2
-- numpy       1.11
-- pandas      0.19.1
-- sklearn     0.18.1
-- scipy       0.19.0
-- statsmodels 0.0.0
-- reportlab   3.4.0
-
-
-COMMAND LINE
-============
-
-
-	./PhageTerm.py -f reads.fastq -r phage_sequence.fasta [-n phage_name -p reads_paired 
-	-s seed_lenght -d surrounding -t installation_test -c nbr_core -g host.fasta 
-	(warning increase process time)]
-
-    
-	Help:   
-    
-        ./PhageTerm.py -h
-        ./PhageTerm.py --help
-    
-    Options:
-
-	Raw reads file in fastq format:
-    -f INPUT_FILE, --fastq=INPUT_FILE
-                        Fastq reads 
-                        (NGS sequences from random fragmentation DNA only, 
-                        e.g. Illumina TruSeq)
-                        
-	Raw reads file in fastq format:
-    -p INPUT_FILE, --paired=INPUT_FILE
-                        Paired fastq reads 
-                        (NGS sequences from random fragmentation DNA only, 
-                        e.g. Illumina TruSeq)                       
-                        
-	Phage genome in fasta format:
-    -r INPUT_FILE, --ref=INPUT_FILE
-                        Reference phage genome as unique contig in fasta format
-
-	Name of the phage being analyzed by the user:
-    -n PHAGE_NAME, --phagename=PHAGE_NAME
-                        Manually enter the name of the phage being analyzed.
-                        Used as prefix for output files.
-
-	Lenght of the seed used for reads in the mapping process:
-    -s SEED_LENGHT, --seed=SEED_LENGHT
-                        Manually enter the lenght of the seed used for reads
-                        in the mapping process (Default: 20).
-
-	Lenght of the seed used for reads in the mapping process:
-    -d SUROUNDING_LENGHT, --surrounding=SUROUNDING_LENGHT
-                        Manually enter the lenght of the surrounding used to
-                        merge close peaks in the analysis process (Default: 20).
-
-	Host genome in fasta format:
-    -g INPUT_FILE, --host=INPUT_FILE
-                        Reference host genome as unique contig in fasta format
-                        Warning: increase drastically process time
-
-	Core processor number to use:
-    -c CORE_NBR, --core=CORE_NBR
-                        Number of core processor to use (Default: 1).
-                        
-	Define phage mean coverage:
-    -m MEAN_NBR, --mean=MEAN_NBR
-                        Phage mean coverage to use (Default: 250).                        
-                                       
-	Software run test:
-    -t TEST_VALUE, --test=TEST_VALUE
-                        TEST_VALUE=C5   : Test run for a 5' cohesive end (e.g. Lambda)                        
-               			TEST_VALUE=C3   : Test run for a 3' cohesive end (e.g. HK97)
-               			TEST_VALUE=DS   : Test run for a short Direct Terminal Repeats end (e.g. T7)
-               			TEST_VALUE=DL   : Test run for a long Direct Terminal Repeats end (e.g. T5)
-               			TEST_VALUE=H    : Test run for a Headful packaging (e.g. P1)
-               			TEST_VALUE=M    : Test run for a Mu-like packaging (e.g. Mu)
-               
-                        
-OUTPUT FILES
-==========
-
-	(i) Report (.pdf)
-	
-	(ii) Statistical table (.csv) 
-
-	(iii) Sequence files (.fasta)
-	
-
-CONTACT
-=======
-
-Julian Garneau <julian.garneau@usherbrooke.ca>
-Marc Monot <marc.monot@pasteur.fr>
-David Bikard <david.bikard@pasteur.fr>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phageterm/READ_ME.txt	Fri Jun 09 06:00:29 2017 -0400
@@ -0,0 +1,45 @@
+PROGRAM
+=======
+
+This is a wrapper for PhageTerm
+
+
+VERSION
+=======
+
+Version 1.0.8
+
+
+INTRODUCTION
+============
+
+PhageTerm software is a tool to determine phage termini and packaging mode
+from high throughput sequences that rely on the random fragmentation of DNA (e.g. 
+Illumina TruSeq but NOT Nextera). Phage sequencing reads from a fastq file are aligned to 
+the phage reference genome in order to calculate two types of coverage values (whole 
+genome coverage and the starting position coverage). The starting position coverage is 
+used to perform a detailed termini analysis. If the user provides the host sequence, reads 
+that does not match the phage genome are tested on the host using the same mapping function.
+
+Preprint manuscript: http://biorxiv.org/content/early/2017/02/16/108100
+Source Code: https://sourceforge.net/projects/phageterm.
+
+
+PREREQUISITES
+=============
+
+- Python      2.7.X
+- matplotlib  2.0.2
+- numpy       1.11
+- pandas      0.19.1
+- sklearn     0.18.1
+- scipy       0.19.0
+- statsmodels 0.8.0
+- reportlab   3.4.0	
+
+CONTACT
+=======
+
+Julian Garneau <julian.garneau@usherbrooke.ca>
+Marc Monot <marc.monot@pasteur.fr>
+David Bikard <david.bikard@pasteur.fr>
Binary file phageterm/_modules/._functions_PhageTerm.py has changed
--- a/phageterm/_modules/functions_PhageTerm.py	Fri Jun 09 04:45:21 2017 -0400
+++ b/phageterm/_modules/functions_PhageTerm.py	Fri Jun 09 06:00:29 2017 -0400
@@ -682,11 +682,11 @@
     res_plus  = pd.DataFrame({"Position": res['Position'], "SPC_std": res['SPC_norm_plus']*100,  "SPC": res['SPC_norm_plus_close']*100,  "pval_gamma": res['pval_plus'] ,  "pval_gamma_adj": res['pval_plus_adj']})
     res_minus = pd.DataFrame({"Position": res['Position'], "SPC_std": res['SPC_norm_minus']*100, "SPC": res['SPC_norm_minus_close']*100, "pval_gamma": res['pval_minus'] , "pval_gamma_adj": res['pval_minus_adj']})
     
-    res_plus.sort_values("SPC",  ascending=[0], inplace=[1])
-    res_minus.sort_values("SPC", ascending=[0], inplace=[1])
+    res_plus.sort_values("SPC",  ascending=False, inplace=True)
+    res_minus.sort_values("SPC", ascending=False, inplace=True)
     
-    res_plus.reset_index(drop=True,  inplace=[1])
-    res_minus.reset_index(drop=True, inplace=[1])
+    res_plus.reset_index(drop=True,  inplace=True)
+    res_minus.reset_index(drop=True, inplace=True)
     
     return res, res_plus, res_minus
 
@@ -716,7 +716,7 @@
     """Return significant peaks over a limit"""
     table_pvalue       = table.loc[lambda df: df.pval_gamma_adj < pvalue,:]
     table_pvalue_limit = table_pvalue.loc[lambda df: df.SPC > limit,:]
-    table_pvalue_limit.reset_index(drop=True, inplace=[1])
+    table_pvalue_limit.reset_index(drop=True, inplace=True)
     return table_pvalue_limit
 
 
@@ -1213,7 +1213,7 @@
     else:
         # Texte
         axes.text(0.4, 0.7, r"NEW", fontsize=50, fontweight='bold')
-        axes.text(0.48, 0.3, r"!", fontsize=200, fontweight='bold')
+        axes.text(0.44, 0.3, r"!", fontsize=200, fontweight='bold')
 
     # Draw graph
     if draw:
@@ -1449,6 +1449,11 @@
             ptext = '<i><font size=12>*Direct Terminal Repeats: ' + str(len(P_seqcoh)) + ' bp</font></i>'
         report.append(Paragraph(ptext, styles["Left"]))
 
+    # Multiple / Multiple (Nextera)
+    if P_left == "Multiple" and P_right == "Multiple":
+        ptext = '<i><font size=12>*This results could be due to a non-random fragmented sequence (e.g. Nextera)</font></i>'
+        report.append(Paragraph(ptext, styles["Left"]))
+
     # Concatermer
     elif P_class[:7] == "Headful" and paired != "":
         ptext = '<i><font size=12>*concatemer estimation: ' + str(P_concat) + '</font></i>'