#Below follows the R code (functions) required to #run the examples of the paper #A statistical framework for the interpretation of mtDNA mixtures: forensic and medical applications #Thore Egeland and Antonio Salas #Sept 16, 2011 ##input #mix.sites denote sites displaying mix, notation is as #in file Thore_mixtures_test,txt from Toņo, i.e., #Iberia 21 093 #Iberia 22 093 189 293 #Iberia 23 093 224 311 #The prefix X is used since this is default when data is read by R and #since both 093 and 93 is used and some trick is needed to distinguish these # dev.rcs denotes non mixing sites which deviates from crs # data is database on binary format, excerpts # TAG X0 X002 X025 X026 # 1 1 1 0 0 0 # 2 2 1 0 0 0 # #crs crs cambridge revised sequence, first entry id (not used) #output is decomposition of evidence (if any), with likelihoods and list of haplotypes mtmix2=function(mix.sites=c("X093","X189","X293"),dev.crs=c("X224","X311"),data=dat,crs=dat[1,],n.cont=2){ if(n.cont<2) return("No contributors n.cont must be > 1") nn=colnames(data) if(length(mix.sites)==0) return("Error. No mixing sites") if(length(mix.sites)>=length(crs)-1) return("Error. All sites are mixing") if(!all(c(dev.crs,mix.sites)%in%nn)) return("Error. Non existing sites provided") i.mix=(1:dim(data)[2])[nn%in%mix.sites] ##index of mixing sites stain=crs if(length(dev.crs)>0){ index.dev.crs=sort((1:dim(data)[2])[nn%in%dev.crs]) stain[index.dev.crs]=1-stain[index.dev.crs] } res=mtmix1(data=data,stain=stain,i.mix=i.mix,n.cont=n.cont) if(all(res==-1)) return("Not possible mixture based on database") bb=length(res)/(4+n.cont-2) if(as.integer(bb)/bb != 1) return("Not possible mixture or possible for fewer contributors") res.lik=data.frame(matrix(res,ncol=4+n.cont-2)) colnames(res.lik)=c(paste("haplo",1:n.cont,sep=""),"lik","loglik") haplo=sort(unique(as.integer(unlist(res.lik[,1:n.cont])))) #the unique haplotypes cc=makeHaploList(data=data,haplo=haplo,crs=crs) list.haplo=cc$list.haplo list(res.lik=res.lik,haplotypes=list.haplo,freq.haplotypes=cc$freq) } makeHaploList=function(data=NULL,haplo=NULL,crs=NULL){ d1=dim(data)[1] d2=dim(data)[2] dh=length(haplo) list.haplo=vector("list",dh) for (i in 1:dh){ index.haplo=(1:d1)[data[,1]%in%haplo[i]] this.haplo=data[index.haplo,] if(all(this.haplo[,-1]==crs[-1])) this.haplo=data.frame(ID=data[index.haplo,1]) else { index.sites=(1:d2)[this.haplo!=crs] this.haplo=data[index.haplo,index.sites] } list.haplo[[i]]=this.haplo } foo=count.haplo2.f(data=data) freq=data.frame(ID=foo$ID,freq=foo$out.freq) freq=freq[freq[,1]%in%haplo,] list(list.haplo=list.haplo,freq=freq) } mixLik=function(Mset=M,data=dat){ aa=count.haplo2.f(data) id=aa$ID if(!is.matrix(Mset)){ Mset=c(Mset) lik=prod(aa[id%in%Mset,4]) res=c(haplo=Mset,lik=lik,loglik=log(lik)) } if(is.matrix(Mset)){ d1=dim(Mset)[1] lik=rep(-1,d1) for (i in 1:d1) lik[i]=prod(aa[id%in%Mset[i,],4]) res=cbind(haplo=Mset,lik=lik,loglik=log(lik)) } res } mtmix1=function(data=dat,stain=dat[21,],i.mix=c(2,27),n.cont=2){ r.cand=findCand2(data=data,stain=stain,i.mix=i.mix)$r.cand if(dim(r.cand)[1]<=1) return(-1) if(dim(r.cand)[1]==2) M=r.cand[,1] else M=checkCand2(cand=r.cand,n.cont=n.cont) if(length(M)==0) return(-1) mixLik(Mset=M,data=data) } findCand2=function(data=dat,stain=dat[21,],i.mix=c(2,27),n.cont=2){ ##Input #dat database on standard binary format with id as first column #stain identifies trace. Mix sites are not used and may be set arbitrary #i.mix sites showing mix index=duplicated(data[,-1]) r.dat=data[!index,] keep=apply(r.dat[,-c(1,i.mix)],1,function(x) all(x==stain[1,-c(1,i.mix)])) list(M=r.dat[keep,1],cand=r.dat[keep,],r.cand=r.dat[keep,c(1,i.mix)]) } count.haplo2.f=function (data = data1){ #first column of data is now ID bb <- apply(data[,-1], 1, foo <- function(x) paste(x, sep = "", collapse = ".")) ord = order(bb) bb = sort(bb) cc = rle(bb) cc2 = as.integer(rep(cc$l, cc$l)) out = data.frame(haplo = bb, count = cc2, freq = cc2/dim(data)[1]) data.frame(ID=data[,1],out=out[order(ord), ]) } checkCand2=function(cand=r.cand,n.cont=2){ ##Input r.cand from findCand #output pairs that may constitute mixture if(length(cand[,1])