Mercurial > repos > davidvanzessen > imgt_concatenate
comparison imgt_concatenate.sh @ 2:d77d4700fd0a draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 27 Dec 2016 10:11:21 -0500 |
parents | b360a373835f |
children |
comparison
equal
deleted
inserted
replaced
1:b360a373835f | 2:d77d4700fd0a |
---|---|
31 function concat_imgt_files { | 31 function concat_imgt_files { |
32 indir=$1 | 32 indir=$1 |
33 outdir=$2 | 33 outdir=$2 |
34 start_line=$3 #line # to start at, 2 to skip header | 34 start_line=$3 #line # to start at, 2 to skip header |
35 id=$4 | 35 id=$4 |
36 cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/1_Summary.txt" | 36 if [[ "${start_line}" == "1" ]] ; then |
37 cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt" | 37 cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/1_Summary.txt" |
38 cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt" | 38 cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt" |
39 cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt" | 39 cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt" |
40 cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt" | 40 cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt" |
41 cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/6_Junction.txt" | 41 cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt" |
42 cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" | 42 cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/6_Junction.txt" |
43 cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" | 43 cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" |
44 cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt" | 44 cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" |
45 cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt" | 45 cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt" |
46 cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt" | |
47 else | |
48 cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/1_Summary.txt" | |
49 cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt" | |
50 cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt" | |
51 cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt" | |
52 cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt" | |
53 cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/6_Junction.txt" | |
54 cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" | |
55 cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" | |
56 cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt" | |
57 cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt" | |
58 fi | |
59 | |
46 } | 60 } |
47 | 61 |
48 echo "Unpacking IMGT file 1.." | 62 echo "Unpacking IMGT file 1.." |
49 imgt_unpack ${inputs[0]} "$workdir/input1" | 63 imgt_unpack ${inputs[0]} "$workdir/input1" |
50 | 64 |
66 echo "Concatenating IMGT file $1..." | 80 echo "Concatenating IMGT file $1..." |
67 concat_imgt_files "${current_dir}" "$workdir/output" 2 $id | 81 concat_imgt_files "${current_dir}" "$workdir/output" 2 $id |
68 i=$((i+2)) | 82 i=$((i+2)) |
69 done | 83 done |
70 | 84 |
85 echo "`head $workdir/output/1_Summary.txt`" | |
86 | |
71 | 87 |
72 echo "Creating new IMGT zip" | 88 echo "Creating new IMGT zip" |
73 cd "$workdir/output" | 89 cd "$workdir/output" |
74 tar cfJ "$output" * | 90 tar cfJ "$output" * |
75 | 91 |