# HG changeset patch
# User davidvanzessen
# Date 1482851481 18000
# Node ID d77d4700fd0ab7102f886daa55f58ccdfb6c9d58
# Parent b360a373835fb238d7afa6f80880785a166e4720
Uploaded
diff -r b360a373835f -r d77d4700fd0a imgt_concatenate.sh
--- a/imgt_concatenate.sh Thu Nov 24 10:27:14 2016 -0500
+++ b/imgt_concatenate.sh Tue Dec 27 10:11:21 2016 -0500
@@ -33,16 +33,30 @@
outdir=$2
start_line=$3 #line # to start at, 2 to skip header
id=$4
- cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/1_Summary.txt"
- cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
- cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt"
- cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
- cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt"
- cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/6_Junction.txt"
- cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
- cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
- cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt"
- cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt"
+ if [[ "${start_line}" == "1" ]] ; then
+ cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/1_Summary.txt"
+ cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
+ cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt"
+ cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
+ cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt"
+ cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/6_Junction.txt"
+ cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
+ cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
+ cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt"
+ cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>1) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt"
+ else
+ cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/1_Summary.txt"
+ cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
+ cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt"
+ cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
+ cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt"
+ cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/6_Junction.txt"
+ cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
+ cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
+ cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt"
+ cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -F $'\t' -v id=$id 'BEGIN {OFS = FS} { if(NR>0) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt"
+ fi
+
}
echo "Unpacking IMGT file 1.."
@@ -68,6 +82,8 @@
i=$((i+2))
done
+echo "`head $workdir/output/1_Summary.txt`"
+
echo "Creating new IMGT zip"
cd "$workdir/output"
diff -r b360a373835f -r d77d4700fd0a imgt_concatenate.xml
--- a/imgt_concatenate.xml Thu Nov 24 10:27:14 2016 -0500
+++ b/imgt_concatenate.xml Tue Dec 27 10:11:21 2016 -0500
@@ -22,7 +22,7 @@
-
+
.+
@@ -30,7 +30,7 @@
-
+