3
|
1 import glob
|
|
2 import os
|
|
3 import shutil
|
|
4
|
|
5 with open("genbank_ids") as f:
|
|
6 SAMPLES = f.read().splitlines()
|
|
7
|
|
8 with open("genbank_files") as f:
|
|
9 for line in f.readlines():
|
|
10 cmd = "grep 'ACCESSION' "+line
|
|
11 returned_value = subprocess.getoutput(cmd)
|
|
12 words = returned_value.split()
|
|
13 SAMPLES.append(words[1])
|
|
14
|
|
15 rule final:
|
|
16 input:
|
|
17 "pav_matrix.csv",
|
|
18 "GCskew.txt",
|
|
19 "pav_matrix.tsv",
|
|
20 "heatmap.svg",
|
|
21 "cog_output.txt"
|
|
22
|
|
23 rule wget:
|
|
24 input:
|
|
25 "genbank_ids"
|
|
26 output:
|
|
27 expand("data/{sample}.fasta", sample=SAMPLES),
|
|
28 expand("data/{sample}.gb", sample=SAMPLES)
|
|
29 shell:
|
|
30 """
|
|
31 perl wget.pl {input} data
|
|
32 """
|
|
33
|
|
34 rule gcskew:
|
|
35 input:
|
|
36 "data/{sample}.fasta"
|
|
37 output:
|
|
38 "data/{sample}.fasta.gcskew.txt"
|
|
39 shell:
|
|
40 """
|
|
41 python3 SkewIT/src/gcskew.py -i {input} -o {input}.gcskew.txt -k 1000 -w 1000
|
|
42 """
|
|
43
|
|
44 rule concat_gcskew:
|
|
45 input:
|
|
46 expand("data/{sample}.fasta.gcskew.txt", sample=SAMPLES)
|
|
47 output:
|
|
48 out2="GCskew.txt"
|
|
49 shell:
|
|
50 """
|
|
51 cat {input} >>{output.out2}
|
|
52 """
|
|
53
|
|
54 rule genbank2gff3:
|
|
55 input:
|
|
56 "data/{sample}.gb"
|
|
57 output:
|
|
58 gff1="data/{sample}.gb.gff",
|
|
59 gff2="data/{sample}.gb.rmdup.gff"
|
|
60 shell:
|
|
61 """
|
|
62 perl bp_genbank2gff3.pl -o data {input}
|
|
63 perl remove_duplicates_in_gff.pl {output.gff1} {output.gff2}
|
|
64 """
|
|
65
|
|
66 rule panaroo:
|
|
67 input:
|
|
68 expand("data/{sample}.gb.rmdup.gff", sample=SAMPLES)
|
|
69 output:
|
|
70 pav="pav_matrix.csv"
|
|
71 shell:
|
|
72 """
|
|
73 mkdir panaroo_outdir
|
|
74 panaroo --clean-mode strict -o panaroo_outdir -i data/*gb.rmdup.gff
|
|
75 cp -rf panaroo_outdir/gene_presence_absence_roary.csv {output.pav}
|
|
76 """
|
|
77
|
|
78 rule convert_matrix:
|
|
79 input:
|
|
80 pav="pav_matrix.csv"
|
|
81 output:
|
|
82 "pav_matrix.tsv"
|
|
83 shell:
|
|
84 """
|
|
85 perl ConvertPanarooMatrix.pl data pav_matrix.csv pav_matrix.tsv data/strains.txt
|
|
86 """
|
|
87
|
|
88 rule heatmap_upset:
|
|
89 input:
|
|
90 pav="pav_matrix.tsv"
|
|
91 output:
|
|
92 "heatmap.svg"
|
|
93 shell:
|
|
94 """
|
|
95 perl GenerateHeatmapFromPAV.pl pav_matrix.tsv heatmap.svg
|
|
96 """
|
|
97
|
|
98 rule cog:
|
|
99 input:
|
|
100 pav="pav_matrix.tsv"
|
|
101 output:
|
|
102 "cog_output.txt"
|
|
103 shell:
|
|
104 """
|
|
105 perl GetCogOfCluster.pl pav_matrix.tsv data {output}
|
|
106 """
|