view test-data/references/01-prepro-flash.log @ 0:59bc96331073 draft default tip

planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/tree/v3.1.0 commit 08296fc88e3e938c482c631bd515b3b7a0499647
author frogs
date Thu, 28 Feb 2019 10:14:49 -0500
parents
children
line wrap: on
line source

## Application
Software: preprocess.py (version: 3.1)
Command: /home/maria/workspace/git/FROGS/FROGS_master/test/../app/preprocess.py illumina --min-amplicon-size 44 --max-amplicon-size 490 --five-prim-primer GGCGVACGGGTGAGTAA --three-prim-primer GTGCCAGCNGCNGCGG --R1-size 267 --R2-size 266 --expected-amplicon-size 420 --merge-software flash --nb-cpus 4 --mismatch-rate 0.15 --keep-unmerged --input-archive /home/maria/workspace/git/FROGS/FROGS-wrapper_dev/test-data/input/temp/test_dataset.tar.gz --output-dereplicated res/01-prepro-flash.fasta --output-count res/01-prepro-flash.tsv --summary res/01-prepro-flash.html --log-file res/01-prepro-flash.log


##Sample
R1 : res/1550052675.22_3437_01_R1.fastq
R2 : res/1550052675.22_3437_01_R2.fastq
Sample name : 01
nb seq before process : 30000
##Commands
########################################################################################################
# Join overlapping paired reads. (flash version : v1.2.11)
Command:
	flash --threads 1 --allow-outies --min-overlap 43 --max-overlap 133 --max-mismatch-density 0.15  --compress res/1550052675.22_3437_01_R1.fastq res/1550052675.22_3437_01_R2.fastq --output-directory res --output-prefix 1550052676.19_3438_01_flash 2> res/1550052676.19_3438_01_flash.stderr

Execution:
	start: 13 Feb 2019 11:11:16
	end:   13 Feb 2019 11:11:22

Results:
	nb seq paired-end assembled: 17622
########################################################################################################
# Removes reads without the 5' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -g GGCGVACGGGTGAGTAA --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 16 -o res/1550052676.19_3438_01_cutadapt_5prim_trim.fastq.gz res/1550052676.19_3438_01_flash.extendedFrags.fastq.gz > res/1550052676.19_3438_01_cutadapt_5prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:22
	end:   13 Feb 2019 11:11:25

Results:
	nb seq with 5' primer : 17622
########################################################################################################
# Removes reads without the 3' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -a GTGCCAGCNGCNGCGG --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 15 -o res/1550052676.19_3438_01_cutadapt.fastq.gz res/1550052676.19_3438_01_cutadapt_5prim_trim.fastq.gz > res/1550052676.19_3438_01_cutadapt_3prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:26
	end:   13 Feb 2019 11:11:30

Results:
	nb seq with 3' primer : 17622
########################################################################################################
# Filters amplicons without primers by length and N count. (filterSeq.py version : 1.5.0)
Command:
	filterSeq.py --force-fasta --max-N 0 --min-length 11 --max-length 457 --input-file res/1550052676.19_3438_01_cutadapt.fastq.gz --output-file res/1550052676.19_3438_01_N_and_length_filter.fasta --log-file res/1550052676.19_3438_01_N_and_length_filter_log.txt

Execution:
	start: 13 Feb 2019 11:11:30
	end:   13 Feb 2019 11:11:31

Results:
	nb seq with expected length : 17622
	nb seq without N : 17622
########################################################################################################
# Concatenate paired reads. (combine_and_split.py version : )
Command:
	combine_and_split.py  --reads1 res/1550052676.19_3438_01_flash.notCombined_1.fastq.gz --reads2 res/1550052676.19_3438_01_flash.notCombined_2.fastq.gz -c XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX --combined-output res/1550052676.19_3438_01_artificial_combined.fastq.gz

Execution:
	start: 13 Feb 2019 11:11:31
	end:   13 Feb 2019 11:11:38

Results:
	nb seq paired-end assembled: 12378
########################################################################################################
# Removes reads without the 5' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -g GGCGVACGGGTGAGTAA --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 16 -o res/1550052676.19_3438_01_art_comb_cutadapt_5prim_trim.fastq.gz res/1550052676.19_3438_01_artificial_combined.fastq.gz > res/1550052676.19_3438_01_art_comb_cutadapt_5prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:39
	end:   13 Feb 2019 11:11:41

Results:
	nb seq with 5' primer : 12378
########################################################################################################
# Removes reads without the 3' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -a GTGCCAGCNGCNGCGG --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 15 -o res/1550052676.19_3438_01_art_comb_cutadapt.fastq.gz res/1550052676.19_3438_01_art_comb_cutadapt_5prim_trim.fastq.gz > res/1550052676.19_3438_01_art_comb_cutadapt_3prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:42
	end:   13 Feb 2019 11:11:45

Results:
	nb seq with 3' primer : 12378
########################################################################################################
# Filters amplicons without primers by length and N count. (filterSeq.py version : 1.5.0)
Command:
	filterSeq.py --force-fasta --max-N 0 --min-length 267 --input-file res/1550052676.19_3438_01_art_comb_cutadapt.fastq.gz --output-file res/1550052676.19_3438_01_art_N_filter.fasta --log-file res/1550052676.19_3438_01_art_N_filter_log.txt

Execution:
	start: 13 Feb 2019 11:11:45
	end:   13 Feb 2019 11:11:46

Results:
	nb seq with expected length : 12378
	nb seq without N : 12378
########################################################################################################
# Replace join tag. (combine_and_split.py version : )
Command:
	combine_and_split.py  --reads1 res/1550052676.19_3438_01_art_N_filter.fasta -s XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -c NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN --combined-output res/1550052676.19_3438_01_art_XtoN.fasta

Execution:
	start: 13 Feb 2019 11:11:46
	end:   13 Feb 2019 11:11:50

########################################################################################################
# Dereplicates sample sequences. (derepSamples.py version : 1.6.1)
Command:
	derepSamples.py --sequences-files res/1550052676.19_3438_01_N_and_length_filter.fasta res/1550052676.19_3438_01_art_XtoN.fasta --dereplicated-file res/1550052675.22_3437_01_filtered.fasta --count-file res/1550052676.19_3438_01_derep_count.tsv

Execution:
	start: 13 Feb 2019 11:11:50
	end:   13 Feb 2019 11:11:52


##Sample
R1 : res/1550052675.22_3437_02_R1.fastq
R2 : res/1550052675.22_3437_02_R2.fastq
Sample name : 02
nb seq before process : 30000
##Commands
########################################################################################################
# Join overlapping paired reads. (flash version : v1.2.11)
Command:
	flash --threads 1 --allow-outies --min-overlap 43 --max-overlap 133 --max-mismatch-density 0.15  --compress res/1550052675.22_3437_02_R1.fastq res/1550052675.22_3437_02_R2.fastq --output-directory res --output-prefix 1550052676.18_3439_02_flash 2> res/1550052676.18_3439_02_flash.stderr

Execution:
	start: 13 Feb 2019 11:11:16
	end:   13 Feb 2019 11:11:22

Results:
	nb seq paired-end assembled: 17500
########################################################################################################
# Removes reads without the 5' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -g GGCGVACGGGTGAGTAA --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 16 -o res/1550052676.18_3439_02_cutadapt_5prim_trim.fastq.gz res/1550052676.18_3439_02_flash.extendedFrags.fastq.gz > res/1550052676.18_3439_02_cutadapt_5prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:22
	end:   13 Feb 2019 11:11:25

Results:
	nb seq with 5' primer : 17500
########################################################################################################
# Removes reads without the 3' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -a GTGCCAGCNGCNGCGG --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 15 -o res/1550052676.18_3439_02_cutadapt.fastq.gz res/1550052676.18_3439_02_cutadapt_5prim_trim.fastq.gz > res/1550052676.18_3439_02_cutadapt_3prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:26
	end:   13 Feb 2019 11:11:30

Results:
	nb seq with 3' primer : 17500
########################################################################################################
# Filters amplicons without primers by length and N count. (filterSeq.py version : 1.5.0)
Command:
	filterSeq.py --force-fasta --max-N 0 --min-length 11 --max-length 457 --input-file res/1550052676.18_3439_02_cutadapt.fastq.gz --output-file res/1550052676.18_3439_02_N_and_length_filter.fasta --log-file res/1550052676.18_3439_02_N_and_length_filter_log.txt

Execution:
	start: 13 Feb 2019 11:11:30
	end:   13 Feb 2019 11:11:31

Results:
	nb seq with expected length : 17500
	nb seq without N : 17500
########################################################################################################
# Concatenate paired reads. (combine_and_split.py version : )
Command:
	combine_and_split.py  --reads1 res/1550052676.18_3439_02_flash.notCombined_1.fastq.gz --reads2 res/1550052676.18_3439_02_flash.notCombined_2.fastq.gz -c XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX --combined-output res/1550052676.18_3439_02_artificial_combined.fastq.gz

Execution:
	start: 13 Feb 2019 11:11:31
	end:   13 Feb 2019 11:11:38

Results:
	nb seq paired-end assembled: 12500
########################################################################################################
# Removes reads without the 5' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -g GGCGVACGGGTGAGTAA --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 16 -o res/1550052676.18_3439_02_art_comb_cutadapt_5prim_trim.fastq.gz res/1550052676.18_3439_02_artificial_combined.fastq.gz > res/1550052676.18_3439_02_art_comb_cutadapt_5prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:39
	end:   13 Feb 2019 11:11:42

Results:
	nb seq with 5' primer : 12500
########################################################################################################
# Removes reads without the 3' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -a GTGCCAGCNGCNGCGG --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 15 -o res/1550052676.18_3439_02_art_comb_cutadapt.fastq.gz res/1550052676.18_3439_02_art_comb_cutadapt_5prim_trim.fastq.gz > res/1550052676.18_3439_02_art_comb_cutadapt_3prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:42
	end:   13 Feb 2019 11:11:45

Results:
	nb seq with 3' primer : 12500
########################################################################################################
# Filters amplicons without primers by length and N count. (filterSeq.py version : 1.5.0)
Command:
	filterSeq.py --force-fasta --max-N 0 --min-length 267 --input-file res/1550052676.18_3439_02_art_comb_cutadapt.fastq.gz --output-file res/1550052676.18_3439_02_art_N_filter.fasta --log-file res/1550052676.18_3439_02_art_N_filter_log.txt

Execution:
	start: 13 Feb 2019 11:11:45
	end:   13 Feb 2019 11:11:46

Results:
	nb seq with expected length : 12500
	nb seq without N : 12500
########################################################################################################
# Replace join tag. (combine_and_split.py version : )
Command:
	combine_and_split.py  --reads1 res/1550052676.18_3439_02_art_N_filter.fasta -s XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -c NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN --combined-output res/1550052676.18_3439_02_art_XtoN.fasta

Execution:
	start: 13 Feb 2019 11:11:46
	end:   13 Feb 2019 11:11:50

########################################################################################################
# Dereplicates sample sequences. (derepSamples.py version : 1.6.1)
Command:
	derepSamples.py --sequences-files res/1550052676.18_3439_02_N_and_length_filter.fasta res/1550052676.18_3439_02_art_XtoN.fasta --dereplicated-file res/1550052675.22_3437_02_filtered.fasta --count-file res/1550052676.18_3439_02_derep_count.tsv

Execution:
	start: 13 Feb 2019 11:11:51
	end:   13 Feb 2019 11:11:52


##Sample
R1 : res/1550052675.22_3437_03_R1.fastq
R2 : res/1550052675.22_3437_03_R2.fastq
Sample name : 03
nb seq before process : 30000
##Commands
########################################################################################################
# Join overlapping paired reads. (flash version : v1.2.11)
Command:
	flash --threads 1 --allow-outies --min-overlap 43 --max-overlap 133 --max-mismatch-density 0.15  --compress res/1550052675.22_3437_03_R1.fastq res/1550052675.22_3437_03_R2.fastq --output-directory res --output-prefix 1550052676.18_3440_03_flash 2> res/1550052676.18_3440_03_flash.stderr

Execution:
	start: 13 Feb 2019 11:11:16
	end:   13 Feb 2019 11:11:22

Results:
	nb seq paired-end assembled: 17464
########################################################################################################
# Removes reads without the 5' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -g GGCGVACGGGTGAGTAA --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 16 -o res/1550052676.18_3440_03_cutadapt_5prim_trim.fastq.gz res/1550052676.18_3440_03_flash.extendedFrags.fastq.gz > res/1550052676.18_3440_03_cutadapt_5prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:22
	end:   13 Feb 2019 11:11:24

Results:
	nb seq with 5' primer : 17464
########################################################################################################
# Removes reads without the 3' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -a GTGCCAGCNGCNGCGG --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 15 -o res/1550052676.18_3440_03_cutadapt.fastq.gz res/1550052676.18_3440_03_cutadapt_5prim_trim.fastq.gz > res/1550052676.18_3440_03_cutadapt_3prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:25
	end:   13 Feb 2019 11:11:28

Results:
	nb seq with 3' primer : 17464
########################################################################################################
# Filters amplicons without primers by length and N count. (filterSeq.py version : 1.5.0)
Command:
	filterSeq.py --force-fasta --max-N 0 --min-length 11 --max-length 457 --input-file res/1550052676.18_3440_03_cutadapt.fastq.gz --output-file res/1550052676.18_3440_03_N_and_length_filter.fasta --log-file res/1550052676.18_3440_03_N_and_length_filter_log.txt

Execution:
	start: 13 Feb 2019 11:11:29
	end:   13 Feb 2019 11:11:30

Results:
	nb seq with expected length : 17464
	nb seq without N : 17464
########################################################################################################
# Concatenate paired reads. (combine_and_split.py version : )
Command:
	combine_and_split.py  --reads1 res/1550052676.18_3440_03_flash.notCombined_1.fastq.gz --reads2 res/1550052676.18_3440_03_flash.notCombined_2.fastq.gz -c XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX --combined-output res/1550052676.18_3440_03_artificial_combined.fastq.gz

Execution:
	start: 13 Feb 2019 11:11:30
	end:   13 Feb 2019 11:11:37

Results:
	nb seq paired-end assembled: 12536
########################################################################################################
# Removes reads without the 5' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -g GGCGVACGGGTGAGTAA --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 16 -o res/1550052676.18_3440_03_art_comb_cutadapt_5prim_trim.fastq.gz res/1550052676.18_3440_03_artificial_combined.fastq.gz > res/1550052676.18_3440_03_art_comb_cutadapt_5prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:38
	end:   13 Feb 2019 11:11:40

Results:
	nb seq with 5' primer : 12536
########################################################################################################
# Removes reads without the 3' primer and removes primer sequence. (cutadapt version : 1.18)
Command:
	cutadapt -a GTGCCAGCNGCNGCGG --error-rate 0.1 --discard-untrimmed --match-read-wildcards --overlap 15 -o res/1550052676.18_3440_03_art_comb_cutadapt.fastq.gz res/1550052676.18_3440_03_art_comb_cutadapt_5prim_trim.fastq.gz > res/1550052676.18_3440_03_art_comb_cutadapt_3prim_log.txt

Execution:
	start: 13 Feb 2019 11:11:41
	end:   13 Feb 2019 11:11:43

Results:
	nb seq with 3' primer : 12536
########################################################################################################
# Filters amplicons without primers by length and N count. (filterSeq.py version : 1.5.0)
Command:
	filterSeq.py --force-fasta --max-N 0 --min-length 267 --input-file res/1550052676.18_3440_03_art_comb_cutadapt.fastq.gz --output-file res/1550052676.18_3440_03_art_N_filter.fasta --log-file res/1550052676.18_3440_03_art_N_filter_log.txt

Execution:
	start: 13 Feb 2019 11:11:44
	end:   13 Feb 2019 11:11:45

Results:
	nb seq with expected length : 12536
	nb seq without N : 12536
########################################################################################################
# Replace join tag. (combine_and_split.py version : )
Command:
	combine_and_split.py  --reads1 res/1550052676.18_3440_03_art_N_filter.fasta -s XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -c NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN --combined-output res/1550052676.18_3440_03_art_XtoN.fasta

Execution:
	start: 13 Feb 2019 11:11:45
	end:   13 Feb 2019 11:11:49

########################################################################################################
# Dereplicates sample sequences. (derepSamples.py version : 1.6.1)
Command:
	derepSamples.py --sequences-files res/1550052676.18_3440_03_N_and_length_filter.fasta res/1550052676.18_3440_03_art_XtoN.fasta --dereplicated-file res/1550052675.22_3437_03_filtered.fasta --count-file res/1550052676.18_3440_03_derep_count.tsv

Execution:
	start: 13 Feb 2019 11:11:49
	end:   13 Feb 2019 11:11:51



##Sample
All
##Commands
########################################################################################################
# Dereplicates together sequences from several samples. (derepSamples.py version : 1.6.1)
Command:
	derepSamples.py --nb-cpus 4 --size-separator ';size=' --samples-ref res/1550052675.22_3437_derep_inputs.tsv --dereplicated-file res/01-prepro-flash.fasta --count-file res/01-prepro-flash.tsv

Execution:
	start: 13 Feb 2019 11:11:52
	end:   13 Feb 2019 11:11:53