Mercurial > repos > dereeper > pangenome_explorer
diff Snakemake_files/Snakefile_wget_panaroo_heatmap_upset_COG @ 3:e42d30da7a74 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:52:25 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Snakemake_files/Snakefile_wget_panaroo_heatmap_upset_COG Thu May 30 11:52:25 2024 +0000 @@ -0,0 +1,106 @@ +import glob +import os +import shutil + +with open("genbank_ids") as f: + SAMPLES = f.read().splitlines() + +with open("genbank_files") as f: + for line in f.readlines(): + cmd = "grep 'ACCESSION' "+line + returned_value = subprocess.getoutput(cmd) + words = returned_value.split() + SAMPLES.append(words[1]) + +rule final: + input: + "pav_matrix.csv", + "GCskew.txt", + "pav_matrix.tsv", + "heatmap.svg", + "cog_output.txt" + +rule wget: + input: + "genbank_ids" + output: + expand("data/{sample}.fasta", sample=SAMPLES), + expand("data/{sample}.gb", sample=SAMPLES) + shell: + """ + perl wget.pl {input} data + """ + +rule gcskew: + input: + "data/{sample}.fasta" + output: + "data/{sample}.fasta.gcskew.txt" + shell: + """ + python3 SkewIT/src/gcskew.py -i {input} -o {input}.gcskew.txt -k 1000 -w 1000 + """ + +rule concat_gcskew: + input: + expand("data/{sample}.fasta.gcskew.txt", sample=SAMPLES) + output: + out2="GCskew.txt" + shell: + """ + cat {input} >>{output.out2} + """ + +rule genbank2gff3: + input: + "data/{sample}.gb" + output: + gff1="data/{sample}.gb.gff", + gff2="data/{sample}.gb.rmdup.gff" + shell: + """ + perl bp_genbank2gff3.pl -o data {input} + perl remove_duplicates_in_gff.pl {output.gff1} {output.gff2} + """ + +rule panaroo: + input: + expand("data/{sample}.gb.rmdup.gff", sample=SAMPLES) + output: + pav="pav_matrix.csv" + shell: + """ + mkdir panaroo_outdir + panaroo --clean-mode strict -o panaroo_outdir -i data/*gb.rmdup.gff + cp -rf panaroo_outdir/gene_presence_absence_roary.csv {output.pav} + """ + +rule convert_matrix: + input: + pav="pav_matrix.csv" + output: + "pav_matrix.tsv" + shell: + """ + perl ConvertPanarooMatrix.pl data pav_matrix.csv pav_matrix.tsv data/strains.txt + """ + +rule heatmap_upset: + input: + pav="pav_matrix.tsv" + output: + "heatmap.svg" + shell: + """ + perl GenerateHeatmapFromPAV.pl pav_matrix.tsv heatmap.svg + """ + +rule cog: + input: + pav="pav_matrix.tsv" + output: + "cog_output.txt" + shell: + """ + perl GetCogOfCluster.pl pav_matrix.tsv data {output} + """