Mercurial > repos > davidvanzessen > shm_csr
changeset 41:b8ac74723ab0 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 14 Mar 2017 09:30:16 -0400 |
parents | ca2512e1e3ab |
children | 1cf60ae234b4 |
files | merge_and_filter.r wrapper.sh |
diffstat | 2 files changed, 26 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/merge_and_filter.r Thu Dec 29 07:05:45 2016 -0500 +++ b/merge_and_filter.r Tue Mar 14 09:30:16 2017 -0400 @@ -36,6 +36,11 @@ colnames(gene_identification) = c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match") } +print("Summary analysis files columns") +print(names(summ)) + + + input.sequence.count = nrow(summ) print(paste("Number of sequences in summary file:", input.sequence.count)) @@ -70,31 +75,37 @@ filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ))) -#print("mutation analysis files columns") -#print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])])) +print("mutation analysis files columns") +print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])])) result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID") print(paste("Number of sequences after merging with mutation analysis file:", nrow(result))) -#print("mutation stats files columns") -#print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])])) +print("mutation stats files columns") +print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])])) result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID") print(paste("Number of sequences after merging with mutation stats file:", nrow(result))) -#print("hotspots files columns") -#print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])])) +print("hotspots files columns") +print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])])) result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID") print(paste("Number of sequences after merging with hotspots file:", nrow(result))) +print("sequences files columns") +print(c("FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")) + sequences = sequences[,c("Sequence.ID", "FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")] names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq") result = merge(result, sequences, by="Sequence.ID", all.x=T) +print("sequences files columns") +print("CDR3.IMGT") + AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")] names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA") result = merge(result, AAs, by="Sequence.ID", all.x=T)
--- a/wrapper.sh Thu Dec 29 07:05:45 2016 -0500 +++ b/wrapper.sh Tue Mar 14 09:30:16 2017 -0400 @@ -421,22 +421,30 @@ if [[ "$fast" == "no" ]] ; then + + echo "---------------- baseline ----------------" echo "---------------- baseline ----------------<br />" >> $log tmp="$PWD" mkdir $outdir/baseline + echo "<center><h1>BASELINe</h1>" >> $output + header_substring="Based on CDR1, FR2, CDR2, FR3 (27:27:38:55:65:104:-)" + baseline_boundaries="27:27:38:55:65:104:-" if [[ "${empty_region_filter}" == "leader" ]] ; then baseline_boundaries="1:26:38:55:65:104:-" + header_substring="Based on FR1, CDR1, FR2, CDR2, FR3 (1:26:38:55:65:104,-)" fi + + echo "<p>${header_substring}</p></center>" >> $output mkdir $outdir/baseline/IGA_IGG_IGM if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then cd $outdir/baseline/IGA_IGG_IGM - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" else echo "No sequences" > "$outdir/baseline.txt" fi