Mercurial > repos > dereeper > pangenome_explorer
changeset 2:97e4e3e818b6 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:48:09 +0000 |
parents | 032f6b3806a3 |
children | e42d30da7a74 |
files | PanExplorer.xml |
diffstat | 1 files changed, 111 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PanExplorer.xml Thu May 30 11:48:09 2024 +0000 @@ -0,0 +1,111 @@ +<tool id="PanExplorer2" name="PanExplorer2" version="2.0"> + <description> Bacterial pan-genome analysis </description> + <requirements> + <!-- + <requirement type="package" version="2.2.26">blast-legacy</requirement> + <requirement type="package" version="7.480">mafft</requirement> + <requirement type="package" version="14.137">mcl</requirement> + <requirement type="package" version="3.697">phylip</requirement> + <requirement type="package" version="1.7.2">perl-bioperl</requirement> + --> + <requirement type="package" version="2.4.2">singularity</requirement> + + </requirements> + + <command><![CDATA[ +module load system/singularity/3.6.0; +export PANEX_PATH=${__tool_directory__}; + +perl ${__tool_directory__}/Perl/generateConfig.pl '$private_genomes' '$input' config.yaml '$private_genomes_fasta'; + +cat config.yaml >$logfile; + +if [ -f \$PANEX_PATH/panexplorer.sif ]; then; else; wget -O \$PANEX_PATH/panexplorer.sif https://panexplorer.southgreen.fr/singularity/panexplorer.sif; fi; + + + +#if $software=="pgap" +sed "s/identity\=80/identity\=$min_identity/g" \$PANEX_PATH/Snakemake_files/Snakefile_wget_PGAP_heatmap_upset_COG >snakefile; +#else if $software=="roary" +sed "s/identity\=80/identity\=$min_identity/g" \$PANEX_PATH/Snakemake_files/Snakefile_wget_roary_heatmap_upset_COG >snakefile; +#else if $software=="orthofinder" +sed "s/identity\=80/identity\=$min_identity/g" \$PANEX_PATH/Snakemake_files/Snakefile_orthofinder_heatmap_upset >snakefile; +#else if $software=="cactus" +cp -rf \$PANEX_PATH/Snakemake_files/Snakefile_wget_cactus_heatmap_upset_COG2 snakefile; +#else if $software=="pggb" +sed "s/identity\=30/identity\=$min_identity/g" \$PANEX_PATH/Snakemake_files/Snakefile_wget_pggb_heatmap_upset_COG >snakefile; +#else +sed "s/identity\=80/identity\=$min_identity/g" \$PANEX_PATH/Snakemake_files/Snakefile_wget_panacota_heatmap_upset_COG >snakefile; +#end if + +singularity exec \$PANEX_PATH/panexplorer.sif snakemake --cores 1 -s snakefile >>$logfile 2>&1; + + +cp -rf outputs/upsetr.svg $upset; +cp -rf outputs/pav_matrix.tsv $output; +cp -rf outputs/heatmap.svg.gz $heatmap; +cp -rf outputs/heatmap.svg.heatmap_plotly.html $heatmap_html; +cp -rf outputs/rarefaction_curves.txt $rarefaction_curves; +cp -rf outputs/rarefaction_curves.svg $rarefaction_curves_svg; +cp -rf outputs/heaps.tsv $heaps; +cp -rf outputs/heatmap.svg.complete.pdf.distance_matrix.txt $distance_matrix; +cp -rf outputs/heatmap.svg.complete.pdf.distance_matrix.hclust.newick $njtree; + +#if $software!="orthofinder" +cp -rf outputs/GCskew.txt $gcfile; +cp -rf outputs/cog_output.txt $cogfile; +cp -rf outputs/cog_stats.txt $outcog_stat; +cp -rf outputs/cog_stats2.txt $outcog_stat2; +cp -rf outputs/cog_of_clusters.txt $outcog_clusters; +cp -rf outputs/genomes/genes.txt $genes; +cp -rf outputs/fastani.out.matrix.complete $fastani; +cp -rf outputs/fastani.out.svg $ani_svg; +#end if + +#if $software=="pggb" +cp -rf outputs/all_genomes.vcf $vcf; +#else +touch $vcf; +#end if +]]></command> + + + <inputs> + <param name="input" type="text" multiple="true" label="List of genbank identifiers" help="Coma separated list (ex: CP000235.1,CP001079.1,CP001759.1,CP015994.2)"/> + <param name="private_genomes" type="data" format="zip" label="Zip of genbank or GFF files" optional="true"/> + <param name="private_genomes_fasta" type="data" format="zip" label="Zip of Fasta files" optional="true"/> + <param name="min_identity" type="text" value="80" label="Minimum percentage identity for BlastP" /> + <param type="select" name="software" label="Choose the pan-genome software"> + <option value="pgap">PGAP</option> + <option value="roary">Roary</option> + <option value="panacota">PanACoTA</option> + <option value="orthofinder">OrthoFinder</option> + <option value="cactus">Minigraph-Cactus</option> + <option value="pggb">PanGenome Graph Builder (PGGB)</option> + </param> + </inputs> + + <outputs> + <data format="txt" name="output" label="Pangenome presence absence matrix"/> + <data format="newick" name="njtree" label="PanBased NJ tree"/> + <data format="txt" name="genes" label="Genes"/> + <data format="txt" name="cogfile" label="COG assignation"/> + <data format="txt" name="gcfile" label="GC_percent"/> + <data format="svg" name="upset" label="Upset Diagram"/> + <data format="svg" name="heatmap" label="Presence Absence Heatmap"/> + <data format="html" name="heatmap_html" label="Presence Absence Heatmap interactive"/> + <data format="tabular" name="outcog_stat" label="COG category counts"/> + <data format="tabular" name="outcog_stat2" label="COG category 2 counts"/> + <data format="tabular" name="outcog_clusters" label="COG of clusters"/> + <data format="tabular" name="fastani" label="ANI" /> + <data format="svg" name="ani_svg" label="ANI heatmap" /> + <data format="txt" name="rarefaction_curves" label="Rarefaction curves data"/> + <data format="svg" name="rarefaction_curves_svg" label="Rarefaction curves"/> + <data format="txt" name="heaps" label="Heaps law alpha"/> + <data format="txt" name="distance_matrix" label="Accessory based distance matrix"/> + <data format="vcf" name="vcf" label="VCF file"/> + <data format="txt" name="logfile" label="Logfile"/> + <data format="txt" name="roary_log" label="Roary Logfile"/> +</outputs> + +</tool>