###### This is an R code for Conditional Density Single-Index Approximation and Projection Pursuit Conditional Density Estimation (PPCDE)
###### This code requires calling the functions defined in the file "numerical optimisation.txt"

# The kernel function
ker <- function(x) {
	drop(pmax(35/32*(1-x^2)^3,0)) 
}

# tranforming polar coordinates to cartesian coordinates
polar2cartes <- function(theta) {
	d <- length(theta)+1
	x <- rep(1,d)
	for (i in 1:(d-1)) {
		x[i] <- x[i] * cos(theta[i])
		x[(i+1):d] <- x[(i+1):d] * sin(theta[i])
	}
	drop(x)
}

# tranforming cartesian coordinates to polar coordinates
cartes2polar <- function(x) {
	x <- x / sum(x^2)^0.5
	d <- length(x)-1
	theta <- x[1:d]
	for (i in 1:(d-1)) {
		theta[i] <- acos(theta[i])
		theta[(i+1):d] <- theta[(i+1):d] / sin(theta[i])
	}
	theta[d] <- atan(x[d+1]/x[d])
	theta[which(is.nan(theta))] <- 0
	drop(theta)
}

funk <- function(theta,data) {
	n <- data[[1]]
	h <- data[[2]]
	y <- data[[3]]
	x <- data[[4]]
	G <- data[[5]]
	theta <- polar2cartes(theta)
	L <- Likelihood_point(n,h,y,theta%*%x,G)
	drop(-L)
}


maximization_BFGS <- function(n,h,y,x,p,G) {
	p <- cartes2polar(p)
	ftol <- 1e-9
	itmax <- 50
	pdelta <- 0.0005
	data <- list(n,h,y,x,G)

	result <- fun.dfpmin(p,funk,data,1,itmax,ftol,pdelta)
	theta_hat <- polar2cartes(result[2:length(result)])
	drop(c(result[1],theta_hat))
}


Likelihood_point <- function(n,h,Y,X,G,rand) {
	Kx <- array(0,c(n,n))
	Ky <- array(0,c(n,n))
	for (i in 1:(n-1)) {
		Ky[i,(i+1):n] <- ker((y[i]-y[(i+1):n])/h)
		Ky[(i+1):n,i] <- Ky[i,(i+1):n]	
		Kx[i,(i+1):n] <- ker((X[i]-X[(i+1):n])/h) 
		Kx[(i+1):n,i] <- Kx[i,(i+1):n]	
	}		
	f_hat <- colSums(Ky * Kx) / rowSums(G * Kx)
	w <- which(rowSums(G*Kx)>1e-3 & colSums(Ky*Kx)>1e-3)
	drop( sum(log(f_hat[w]))/length(w) ) -log(h)
}


############
# Conditional probability density approximation via Single-Index or PPCDE approximation
# The function returns the conditional density of y=y0 given d-vector x=x0
# y0 - a real number or a vector of real numbers at which to evaluate the conditional density
# x0 - a vector of dimension d
# y and x is the data used for estimation consisted of n observations (y,x) 
# y - a vector of length n, x - nXd matrix
# M - maximum number of projections >= 1. For single-Index model choose M=1

PPCDE <- function(y,x,y0,x0,M) {
	n <- length(y)
	dimen <- dim(x)[2]
	S <-  c(seq(9,3,by=-1.5))
	
	
	# normalizing the data to have variance=1
	mmx <- colSums(x)/n
	x <- (x - t(replicate(n,mmx)))
	x0 <- x0 - mmx
	mmy <- mean(y)
	ssx <-  eigen(t(x) %*% x/n )
	ssx <-  ssx$vectors %*% diag(ssx$values^(-0.5)) %*% t(ssx$vectors)
	ssy <- sd(y)
	x <- x %*% ssx
	x0 <- c(x0 %*% ssx)
	y <- (y - mmy) / ssy
	y0 <- (y0 - mmy) / ssy
	x <- t(x)

	G <- array(1/n,c(n,n)) # initial zero's model over all observations is the empirical distribution over all observations
	cpdf <- rep(1,length(y0)) # initial zero's model for y0 given x0 (any non-zero)

	H <- 3 * n^(-1/6)
	Ky <- array(0,c(n,n))
	for (i in 1:n) {
		Ky[i,i:n] <- ker((y[i]-y[i:n])/H)
		Ky[i:n,i] <- Ky[i,i:n]
	}

	for (proj in 1:M) {  
		###########  1nd stage of estimation - estimating the orienation vector theta_hat
		theta_bar <-  solve(ssx) %*%c(1,rep(0,dimen-1))
		for (const in  S) {
			h <- const * n^(-1/4) 

			hat <-  maximization_BFGS(n,h,y,x,theta_bar,G) 
			theta_hat <- c(hat[2:length(hat)])
			theta_hat <- theta_hat/(sum(c(theta_hat)^2)^0.5)
			if ( abs(theta_hat %*% theta_bar) > 0.99) {
				break
			}
			theta_bar <- theta_hat
		}

		cat("\n \n Proective direction ", proj, " = ", c(ssx %*% theta_hat) / sum(c(ssx %*% theta_hat)^2)^0.5, "\n\n")


		########### 2nd stage of estimation - estimating the conditional density
		H <- 3 * n^(-1/6)
		Kx <- array(0,c(n,n))
		for (i in 1:n) {
			Kx[i,i:n] <- ker(c(theta_hat %*% (x[,i]-x[,i:n]))/H)
			Kx[i:n,i] <- Kx[i,i:n]
		}


		G <- G * pmax(Ky%*%Kx,1e-3)/pmax(G%*%Kx,1e-3) / H 	## updating the model over all observations

		H <- 3 * n^(-1/6)
		Ky0 <- array(0,c(n,length(y0)))
		for (i in 1:length(y0)) {
			Ky0[,i] <- ker((y-y0[i])/H)
		}
		#Ky0 <- ker((y-y0)/H)	
		Kx0 <- ker(theta_hat %*% (x-x0)/H)
		## updating the model for y0 given x0
		cpdf <- cpdf * colSums(replicate(length(y0),Kx0) * Ky0) / pmax(sum(cpdf*replicate(length(y0),Kx0)),1e-5) / H 	
	}
	cpdf <- cpdf / ssy
	drop(cpdf)

}