Difference between revisions of "Lab journal"
Line 33: | Line 33: | ||
# determine pairs that have a minimum distance of below 5 A |
# determine pairs that have a minimum distance of below 5 A |
||
+ | |||
− | [[ras_ev_hotspots| Ras hotspots]] |
||
− | [[mhc_ev_hotspots| MHC I hotspots]] |
||
− | [[imm_ev_hotspots| Ig C1 hotspots]] |
||
== Calculate structural models == |
== Calculate structural models == |
||
+ | |||
+ | == Scripts == |
||
+ | |||
+ | <code type="R" |
||
+ | |||
+ | cDist <- function(a,b) { |
||
+ | |||
+ | if(abs(a-b) > 5) { |
||
+ | return(TRUE) |
||
+ | } |
||
+ | else { |
||
+ | return(FALSE) |
||
+ | } |
||
+ | } |
||
+ | |||
+ | getValid <- function(dat,col1,col2) { |
||
+ | valid <- with(dat[,c(col1,col2)], mapply(cDist,V1,V3)) |
||
+ | validPairs <- dat[valid,] |
||
+ | return(validPairs) |
||
+ | } |
||
+ | |||
+ | |||
+ | getHS <- function(a,histName="cn_score_hist.png") { |
||
+ | |||
+ | |||
+ | #get pairs with residue distance > 5 |
||
+ | valid <- with(a[,c(1,3)], mapply(cDist,V1,V3)) |
||
+ | validPairs <- a[valid,] |
||
+ | |||
+ | #order by decreasing cn value |
||
+ | ord <- order(validPairs[,6],decreasing=TRUE) |
||
+ | vSorted <- validPairs[ord,] |
||
+ | #plot score distribution |
||
+ | png(histName,width=500,height=400) |
||
+ | hist(vSorted[,6],100,col="lightblue",xlab="CN score", main="",xlim=c(-1,max(vSorted[,6]))) |
||
+ | dev.off() |
||
+ | |||
+ | |||
+ | return(vSorted) |
||
+ | } |
||
+ | |||
+ | |||
+ | getCoords <- function(hs,offset) { |
||
+ | xVal <- sapply(hs[,1],function(x){ x+offset }) |
||
+ | yVal <- sapply(hs[,3],function(x){ x+offset }) |
||
+ | return(cbind(xVal,yVal)) |
||
+ | } |
||
+ | |||
+ | mapAA <- function(dat) { |
||
+ | x <- dat[,c(1,2)] |
||
+ | y <- dat[,c(3,4)] |
||
+ | colnames(y) <- colnames(x) |
||
+ | z <- rbind(x,y) |
||
+ | map <- list() |
||
+ | for (i in 1:nrow(dat)) { |
||
+ | |||
+ | map[[z[i,1]]] <- z[i,2] |
||
+ | } |
||
+ | return(map) |
||
+ | } |
||
+ | |||
+ | normScores <- function(datSorted,l){ |
||
+ | lDat <- datSorted[1:l,] |
||
+ | AAmap <- mapAA(lDat) |
||
+ | nFactor <- mean(lDat[,6]) |
||
+ | x <- lDat[,c(1,6)] |
||
+ | y <- lDat[,c(3,6)] |
||
+ | colnames(y) <- colnames(x) |
||
+ | z <- rbind(x,y) |
||
+ | scSums <- aggregate(z[,2], by=list(z[,1]) , FUN=sum) |
||
+ | ord <- order(scSums[,2],decreasing=T) |
||
+ | scSums <- scSums[ord,] |
||
+ | |||
+ | return(scSums) |
||
+ | |||
+ | } |
||
+ | |||
+ | </code> |
Revision as of 13:23, 24 August 2013
Contents
Multiple sequence alignment
The HFE protein consists of 3 domains: alpha 1 and 2 and a Immunoglobulin C1-set domain. Therefore we found two different pfam families that both match our protein:
Class I Histocompatibility antigen, domains alpha 1 and 2 (http://pfam.sanger.ac.uk/family/PF00129) and Immunoglobulin C1-set domain (http://pfam.sanger.ac.uk/family/PF07654#tabview=tab0).
We checked how much of out proteins sequence is covered by both fimilies:
immunoglobulin
# GS HFE_HUMAN/211-294 DR PDB; 1A6Z A; 189-272;
mhc class 1
# GS HFE_HUMAN/26-202 DR PDB; 1A6Z A; 4-180;
We downloaded the alignments in fasta format. Therefore, we first formatted the alignment to fasta format and then downloaded the .txt file. The alignment were then formatted again with a2m2aln:
/usr/share/freecontact/a2m2aln -q '^HFE_HUMAN/(\d+)' --quiet < imm.txt > imm.aln /usr/share/freecontact/a2m2aln -q '^HFE_HUMAN/(\d+)' --quiet < mhc.txt > mhc.aln /usr/share/freecontact/a2m2aln -q '^RASH_HUMAN/(\d+)' --quiet < ras.txt > ras.aln
freecontact with standard parameters and evfold as output format this was done with the following command:
freecontact -o evfold < imm.aln > imm_contacts.out freecontact -o evfold < mhc.aln > mhc_contacts.out freecontact -o evfold < ras.aln > ras_contacts.out
The contact predictions were then analysed using the following pipeline
- Remove contacts that have a distance of <= 5 residues in sequence
- Order descending by CN score and plot score distribution
- Select high scoring pairs
- Load reference PDB and determine minimum atom distance between all residue pairs
- determine pairs that have a minimum distance of below 5 A
Calculate structural models
Scripts
<code type="R"
cDist <- function(a,b) {
if(abs(a-b) > 5) { return(TRUE) } else { return(FALSE) } }
getValid <- function(dat,col1,col2) { valid <- with(dat[,c(col1,col2)], mapply(cDist,V1,V3)) validPairs <- dat[valid,] return(validPairs) }
getHS <- function(a,histName="cn_score_hist.png") {
- get pairs with residue distance > 5
valid <- with(a[,c(1,3)], mapply(cDist,V1,V3)) validPairs <- a[valid,]
- order by decreasing cn value
ord <- order(validPairs[,6],decreasing=TRUE) vSorted <- validPairs[ord,]
- plot score distribution
png(histName,width=500,height=400) hist(vSorted[,6],100,col="lightblue",xlab="CN score", main="",xlim=c(-1,max(vSorted[,6]))) dev.off()
return(vSorted)
}
getCoords <- function(hs,offset) {
xVal <- sapply(hs[,1],function(x){ x+offset })
yVal <- sapply(hs[,3],function(x){ x+offset })
return(cbind(xVal,yVal))
}
mapAA <- function(dat) { x <- dat[,c(1,2)] y <- dat[,c(3,4)] colnames(y) <- colnames(x) z <- rbind(x,y) map <- list() for (i in 1:nrow(dat)) {
map[[z[i,1]]] <- z[i,2] } return(map) }
normScores <- function(datSorted,l){ lDat <- datSorted[1:l,] AAmap <- mapAA(lDat) nFactor <- mean(lDat[,6]) x <- lDat[,c(1,6)] y <- lDat[,c(3,6)] colnames(y) <- colnames(x) z <- rbind(x,y) scSums <- aggregate(z[,2], by=list(z[,1]) , FUN=sum) ord <- order(scSums[,2],decreasing=T) scSums <- scSums[ord,]
return(scSums)
}