Mercurial > repos > sarahinraauzeville > star
diff sm_STAR2_V2.pl @ 6:b7ed4ee33fb6 draft default tip
Deleted selected files
author | sarahinraauzeville |
---|---|
date | Mon, 09 Dec 2019 10:55:33 -0500 |
parents | c4fc8ff6e280 |
children |
line wrap: on
line diff
--- a/sm_STAR2_V2.pl Tue Dec 12 10:16:23 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,236 +0,0 @@ -#!/usr/bin/perl -w - -# usage : perl sm_STAR.pl <read1.fastq.gz> <read2.fastq.gz> -# 10/02/2014 - Wrapper du traitement des données RNAseq -# Sarah Maman -# Copyright (C) 2014 INRA -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -use strict; -use File::Basename; -use Getopt::Long; -use lib "$ENV{'MY_GALAXY_DIR'}"; -use GalaxyPath; - -my $cfg = GalaxyPath->new( -file => $ENV{"GALAXY_CONFIG_FILE"}); -my $PATH = $cfg->my_path( 'workPath', 'MYWORKSPACE' ); -my $STAR = $cfg->my_path( 'toolsPath', 'STAR_PATH' ); - - - -my $Nthreads; -my $genome_path; -my $reads_selector; -my $input_read; -my $Read1fastqgz; -my $Read2fastqgz; -my $alignIntronMin; -my $alignIntronMax; -my $outFilterMismatchNmax; -my $orientation; -my $refownfastaref; -my $refselector; -my $refowngtf; -my $compress; -my $cufflinks; -my $outputfile; -my $outputfileT; -my $outputlogSJ; -my $outputlogfinal; - - -Getopt::Long::Configure( 'no_ignorecase', 'bundling' ); -GetOptions ( - 'runThreadN=i' => \$Nthreads, - 'genomeDir=s' => \$genome_path, - 'refselector=s' => \$refselector, - 'refownfastaref=s' => \$refownfastaref, - 'refowngtf=s' => \$refowngtf, - 'compress=s' => \$compress, - 'cufflinks=s' => \$cufflinks, - 'readsselector=s'=> \$reads_selector, - 'readFilesIn1=s' => \$Read1fastqgz, - 'readFilesIn2=s' => \$Read2fastqgz, - 'readsinputread=s' => \$input_read, - 'alignIntronMin=i' => \$alignIntronMin, - 'alignIntronMax=i' => \$alignIntronMax, - 'outFilterMismatchNmax=i' => \$outFilterMismatchNmax, - 'orientation=s' => \$orientation, - 'outputfile=s' => \$outputfile, - 'outputfileT=s' => \$outputfileT, - 'outputlogfinal=s' => \$outputlogfinal, - 'outputlogSJ=s' => \$outputlogSJ -) or die "Usage: Error in command line arguments\n"; - -my $cmd1 = ''; my $cmd2 =''; -my $cmd3 = ''; my $cmd4 =''; - -#STAR --runThreadN 4 --runMode genomeGenerate --genomeDir /work/smaman/TP_RNAseq/INDEX/ --genomeFastaFiles ITAG2.3_genomic_Ch6.fasta --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf --sjdbOverhang 100 - -#smaman@node001 /work/smaman/TP_RNAseq $ ls -ltrah INDEX -#-rw-r--r-- 1 smaman BIOINFO 331 17 juil. 11:55 genomeParameters.txt -#-rw-r--r-- 1 smaman BIOINFO 387K 17 juil. 11:55 exonGeTrInfo.tab -#-rw-r--r-- 1 smaman BIOINFO 53K 17 juil. 11:55 geneInfo.tab -#-rw-r--r-- 1 smaman BIOINFO 151K 17 juil. 11:55 transcriptInfo.tab -#-rw-r--r-- 1 smaman BIOINFO 171K 17 juil. 11:55 exonInfo.tab -#-rw-r--r-- 1 smaman BIOINFO 325K 17 juil. 11:55 sjdbList.fromGTF.out.tab -#-rw-r--r-- 1 smaman BIOINFO 272K 17 juil. 11:55 sjdbInfo.txt -#-rw-r--r-- 1 smaman BIOINFO 325K 17 juil. 11:55 sjdbList.out.tab -#-rw-r--r-- 1 smaman BIOINFO 11 17 juil. 11:55 chrName.txt -#-rw-r--r-- 1 smaman BIOINFO 9 17 juil. 11:55 chrLength.txt -#-rw-r--r-- 1 smaman BIOINFO 11 17 juil. 11:55 chrStart.txt -#-rw-r--r-- 1 smaman BIOINFO 20 17 juil. 11:55 chrNameLength.txt -#-rw-r--r-- 1 smaman BIOINFO 47M 17 juil. 11:55 Genome -#-rw-r--r-- 1 smaman BIOINFO 360M 17 juil. 11:55 SA -#-rw-r--r-- 1 smaman BIOINFO 1,5G 17 juil. 11:55 SAindex - - -#STAR --readFilesIn WTr1.fastq WTr2.fastq --genomeDir /work/smaman/TP_RNAseq/INDEX/ --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf --outSAMtype BAM SortedByCoordinate --alignIntronMin 20 --alignIntronMax 1000000 --outFilterMismatchNmax 10 --outSAMtype BAM SortedByCoordinate --runThreadN 4 --outFileNamePrefix galaxyName --outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout --quantMode TranscriptomeSAM - -#-rw-r--r-- 1 smaman BIOINFO 45M 26 mars 2015 ITAG2.3_genomic_Ch6.fasta -#-rw-r--r-- 1 smaman BIOINFO 1,6M 26 mars 2015 ITAG_pre2.3_gene_models_Ch6.gtf -#-rw-r--r-- 1 smaman BIOINFO 29 26 mars 2015 ITAG2.3_genomic_Ch6.fasta.fai -#-rw-r--r-- 1 smaman BIOINFO 614 17 juil. 10:20 WTr1.fastq -#-rw-r--r-- 1 smaman BIOINFO 589 17 juil. 10:20 WTr2.fastq -#-rw-r--r-- 1 smaman BIOINFO 14K 17 juil. 11:55 Log.out -#-rw-r--r-- 1 smaman BIOINFO 35K 17 juil. 12:03 galaxyNameAligned.toTranscriptome.out.bam -#-rw-r--r-- 1 smaman BIOINFO 637 17 juil. 12:03 galaxyNameAligned.sortedByCoord.out.bam +++++++++ -#-rw-r--r-- 1 smaman BIOINFO 0 17 juil. 12:03 galaxyNameSJ.out.tab ++++++++++++++++ -#-rw-r--r-- 1 smaman BIOINFO 246 17 juil. 12:03 galaxyNameLog.progress.out -#-rw-r--r-- 1 smaman BIOINFO 1,7K 17 juil. 12:03 galaxyNameLog.final.out +++++++++++++++ -#-rw-r--r-- 1 smaman BIOINFO 16K 17 juil. 12:03 galaxyNameLog.out - - - -#workspace -my $debug = 0; #Mode debug -if ($debug == 0) - { - print STDOUT "Debug mode OK \n"; - } -else - { - $PATH = dirname($outputfile); - print STDOUT "No debug \n"; - } - - -#Récuperer le numero (unique) de l'output afin, si besoin, de créer un répertoire de travail unique dans /work/galaxy-dev/workspace -my ($nb) = ($outputfile=~/dataset_(\d+)\.\S+$/); - -#Repertoire de sortie cree par le script, verif des droits d'ecriture sur ce repertoire de sortie -`cd $PATH/; mkdir $nb/; chmod -R 777 $nb/; cd $nb/;`; -my $dirresults= "$PATH/".$nb; - -print STDOUT "Job working directory : $dirresults \n"; - - -if ($refselector eq "ownfasta"){ - my $cmdSTARindex="(cd $dirresults/; mkdir INDEX/; chmod 777 INDEX/; $STAR --runThreadN $Nthreads --runMode genomeGenerate --genomeDir $dirresults/INDEX --genomeFastaFiles $refownfastaref --sjdbGTFfile $refowngtf --sjdbOverhang 100) >& ./out_Starindex.log 2>&1"; - system $cmdSTARindex; - #Info pour les biologistes - print STDOUT "STAR Genome Generate : \n\n $cmdSTARindex \n\n "; - $genome_path = "$dirresults/INDEX/"; -} - -my $addcuff; -if ($cufflinks eq "cuff"){ - $addcuff="--outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout --quantMode TranscriptomeSAM "; -}else{ - $addcuff=""; -} - - -my $cat; -if ($reads_selector eq "single"){ - - my $in; - if ($compress eq "compress"){ - #Si besoin, recupération du fichier de configuration avec modification de l extension - `ln -s $input_read $dirresults/input_read.fastq.gz;`; - $in = "$dirresults/input_read.fastq.gz"; - $cat="--readFilesCommand zcat"; - }else - {`ln -s $input_read $dirresults/input_read.fastq;`; - $in = "$dirresults/input_read.fastq"; - $cat="";} - - if ($orientation eq "No"){ - $cmd1 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1"; - system $cmd1; - #Info pour les biologistes - print STDOUT "STAR command run on cluster without oriented reads : \n\n $cmd1 \n\n "; - } - else - { - $cmd2 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1"; - system $cmd2; - #Info pour les biologistes - print STDOUT "STAR command run on cluster with oriented reads : \n\n $cmd2 \n\n - Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks <…> -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs.\n\n"; - } -}else{ - - - my $in1; - my $in2; - if ($compress eq "compress"){ - #Si besoin, recupération du fichier de configuration avec modification de l extension - `ln -s $Read1fastqgz $dirresults/Read1.fastq.gz; ln -s $Read2fastqgz $dirresults/Read2.fastq.gz;`; - $in1="$dirresults/Read1.fastq.gz"; - $in2="$dirresults/Read2.fastq.gz"; - $cat="--readFilesCommand zcat"; - }else - {`ln -s $Read1fastqgz $dirresults/Read1.fastq; ln -s $Read2fastqgz $dirresults/Read2.fastq;`; - $in1="$dirresults/Read1.fastq"; - $in2="$dirresults/Read2.fastq"; - $cat="";} - - - if ($orientation eq "No"){ - $cmd3 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in1 $in2 --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1"; - system $cmd3; - #Info pour les biologistes - print STDOUT "STAR command run on cluster without oriented reads : \n\n $cmd3 \n\n "; - } - else - { - $cmd4 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in1 $in2 --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1"; - #Info pour les biologistes - system $cmd4; - print STDOUT "STAR command run on cluster with oriented reads : \n\n $cmd4 \n\n - Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks <…> -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs.\n\n"; - } - - -} - -#Si besoin : -#TEST 1 : command ligne on vm-galaxy -#TEST 2 perl Galaxy file : perl script.pl path/to/tests/files/used/for/galaxy/perl/script out1 - -#Recuperation des fichiers par Galaxy -#-rw-r--r-- 1 smaman BIOINFO 35K 17 juil. 12:03 galaxyNameAligned.toTranscriptome.out.bam +++++ -#-rw-r--r-- 1 smaman BIOINFO 637 17 juil. 12:03 galaxyNameAligned.sortedByCoord.out.bam +++++++++ -#-rw-r--r-- 1 smaman BIOINFO 0 17 juil. 12:03 galaxyNameSJ.out.tab ++++++++++++++++ -#-rw-r--r-- 1 smaman BIOINFO 1,7K 17 juil. 12:03 galaxyNameLog.final.out +++++++++++++++ -my $bam = glob("$dirresults/*$nb*Aligned.sortedByCoord.out.bam"); -if (! -e $bam){print STDERR "Aligned.sortedByCoord.out.bam file not found. \n";}else{`cp -a $bam $outputfile`;} -my $bamT = glob("$dirresults/*$nb*Aligned.toTranscriptome.out.bam"); -if (! -e $bamT){print STDERR "Aligned.toTranscriptome.out.bam file not found. \n";}else{`cp -a $bamT $outputfileT`;} -my $logSJ = glob("$dirresults/$nb*SJ.out.tab"); -if (! -e $logSJ){print STDERR "SJ.out.tab log file not found. \n";}else{`cp -a $logSJ $outputlogSJ`;} -my $logfinal = glob("$dirresults/$nb*Log.final.out"); -if (! -e $logfinal){print STDERR "Log.final.out log file not found. \n";}else{`cp -a $logfinal $outputlogfinal`;} -