###### This is an R code for Conditional Density Single-Index Approximation and Projection Pursuit Conditional Density Estimation (PPCDE) ###### This code requires calling the functions defined in the file "numerical optimisation.txt" # The kernel function ker <- function(x) { drop(pmax(35/32*(1-x^2)^3,0)) } # tranforming polar coordinates to cartesian coordinates polar2cartes <- function(theta) { d <- length(theta)+1 x <- rep(1,d) for (i in 1:(d-1)) { x[i] <- x[i] * cos(theta[i]) x[(i+1):d] <- x[(i+1):d] * sin(theta[i]) } drop(x) } # tranforming cartesian coordinates to polar coordinates cartes2polar <- function(x) { x <- x / sum(x^2)^0.5 d <- length(x)-1 theta <- x[1:d] for (i in 1:(d-1)) { theta[i] <- acos(theta[i]) theta[(i+1):d] <- theta[(i+1):d] / sin(theta[i]) } theta[d] <- atan(x[d+1]/x[d]) theta[which(is.nan(theta))] <- 0 drop(theta) } funk <- function(theta,data) { n <- data[[1]] h <- data[[2]] y <- data[[3]] x <- data[[4]] G <- data[[5]] theta <- polar2cartes(theta) L <- Likelihood_point(n,h,y,theta%*%x,G) drop(-L) } maximization_BFGS <- function(n,h,y,x,p,G) { p <- cartes2polar(p) ftol <- 1e-9 itmax <- 50 pdelta <- 0.0005 data <- list(n,h,y,x,G) result <- fun.dfpmin(p,funk,data,1,itmax,ftol,pdelta) theta_hat <- polar2cartes(result[2:length(result)]) drop(c(result[1],theta_hat)) } Likelihood_point <- function(n,h,Y,X,G,rand) { Kx <- array(0,c(n,n)) Ky <- array(0,c(n,n)) for (i in 1:(n-1)) { Ky[i,(i+1):n] <- ker((y[i]-y[(i+1):n])/h) Ky[(i+1):n,i] <- Ky[i,(i+1):n] Kx[i,(i+1):n] <- ker((X[i]-X[(i+1):n])/h) Kx[(i+1):n,i] <- Kx[i,(i+1):n] } f_hat <- colSums(Ky * Kx) / rowSums(G * Kx) w <- which(rowSums(G*Kx)>1e-3 & colSums(Ky*Kx)>1e-3) drop( sum(log(f_hat[w]))/length(w) ) -log(h) } ############ # Conditional probability density approximation via Single-Index or PPCDE approximation # The function returns the conditional density of y=y0 given d-vector x=x0 # y0 - a real number or a vector of real numbers at which to evaluate the conditional density # x0 - a vector of dimension d # y and x is the data used for estimation consisted of n observations (y,x) # y - a vector of length n, x - nXd matrix # M - maximum number of projections >= 1. For single-Index model choose M=1 PPCDE <- function(y,x,y0,x0,M) { n <- length(y) dimen <- dim(x)[2] S <- c(seq(9,3,by=-1.5)) # normalizing the data to have variance=1 mmx <- colSums(x)/n x <- (x - t(replicate(n,mmx))) x0 <- x0 - mmx mmy <- mean(y) ssx <- eigen(t(x) %*% x/n ) ssx <- ssx$vectors %*% diag(ssx$values^(-0.5)) %*% t(ssx$vectors) ssy <- sd(y) x <- x %*% ssx x0 <- c(x0 %*% ssx) y <- (y - mmy) / ssy y0 <- (y0 - mmy) / ssy x <- t(x) G <- array(1/n,c(n,n)) # initial zero's model over all observations is the empirical distribution over all observations cpdf <- rep(1,length(y0)) # initial zero's model for y0 given x0 (any non-zero) H <- 3 * n^(-1/6) Ky <- array(0,c(n,n)) for (i in 1:n) { Ky[i,i:n] <- ker((y[i]-y[i:n])/H) Ky[i:n,i] <- Ky[i,i:n] } for (proj in 1:M) { ########### 1nd stage of estimation - estimating the orienation vector theta_hat theta_bar <- solve(ssx) %*%c(1,rep(0,dimen-1)) for (const in S) { h <- const * n^(-1/4) hat <- maximization_BFGS(n,h,y,x,theta_bar,G) theta_hat <- c(hat[2:length(hat)]) theta_hat <- theta_hat/(sum(c(theta_hat)^2)^0.5) if ( abs(theta_hat %*% theta_bar) > 0.99) { break } theta_bar <- theta_hat } cat("\n \n Proective direction ", proj, " = ", c(ssx %*% theta_hat) / sum(c(ssx %*% theta_hat)^2)^0.5, "\n\n") ########### 2nd stage of estimation - estimating the conditional density H <- 3 * n^(-1/6) Kx <- array(0,c(n,n)) for (i in 1:n) { Kx[i,i:n] <- ker(c(theta_hat %*% (x[,i]-x[,i:n]))/H) Kx[i:n,i] <- Kx[i,i:n] } G <- G * pmax(Ky%*%Kx,1e-3)/pmax(G%*%Kx,1e-3) / H ## updating the model over all observations H <- 3 * n^(-1/6) Ky0 <- array(0,c(n,length(y0))) for (i in 1:length(y0)) { Ky0[,i] <- ker((y-y0[i])/H) } #Ky0 <- ker((y-y0)/H) Kx0 <- ker(theta_hat %*% (x-x0)/H) ## updating the model for y0 given x0 cpdf <- cpdf * colSums(replicate(length(y0),Kx0) * Ky0) / pmax(sum(cpdf*replicate(length(y0),Kx0)),1e-5) / H } cpdf <- cpdf / ssy drop(cpdf) }