################################################################
# R functions used to implement the MB, MF/MB, MF2 and MF/MF2 procedures
# from paper "Model-free model-fitting and predictive distributions"
# by D.N. Politis (March 2010--revised April 2012). 
############################################################

# To begin, choose the type of uniformize from the three functions
# given at the end of this document, e.g. 

uniformize <- uniformize.kernel
# or:  uniformize <- uniformize.linear2 

#####################################################################
MFpp<- function(x,Y,xf,ban, box=FALSE, L2=TRUE, delete=FALSE, edgecorrect=TRUE) {
# returns MF2 point predictor (L1 or L2) of Yf at xf (as in Table 4.1) 
# using box or normal kernel (default) for cdf estimation
# edgecorrect=TRUE implies omitting the u's close to the edges
# as in Remark 4.8 of the paper
YYY<- MFppDISTR(x,Y,xf,ban, box, delete, edgecorrect)
Yf<- mean(YYY, na.rm=TRUE)
if (L2==FALSE) {Yf<- median(YYY, na.rm=TRUE)}
Yf  }

############################################################
MFppDISTR<- function(x,Y,xf,ban, box=FALSE,delete=FALSE, edgecorrect=TRUE) { 
# returns MF2 naive predictive distr. of Yf at xf (to take mean or median of)
#  (x,Y) are data from regression; ban=bandwidth to use at xf
# xf is a SINGLE number; box=TRUE means rectangular kernel (the default is Normal)
# delete=TRUE only works for normal kernel here.
n<- length(x)
u<- c(1:n)

if (box==TRUE){ # rectangular (Box) kernel in uniformize
for (i in 1:n){ # this computes the u_t of eq. (35) 
yw<-  Y[ abs(x-x[i])< ban] # w=constant, Rectangular Window
u[i]<-  uniformize(yw, Y[i],  inverse=FALSE) }
# qqplot(u, c(1:length(u))/length(u), xlab="quantiles of transformed data u_t", 
# ylab="quantiles of Uniform (0,1)"   )
u<- na.rm(u) 
Yf<-uniformize(Y[ abs(x-xf)< ban], u, inverse=TRUE)  }

if (box==FALSE){ # if not rectangular, then use normal kernel 
## with IQ range =(-ban/4 ,ban/4)*stdev as Box, i.e., stdev=ban/2.7=0.37*ban
#for (i in 1:n){ # this computes the u_t of eq. (35) 
# w<-  dnorm((x-x[i]), mean = 0, sd = (0.37*ban))
# u[i]<-  uniformize(Y, Y[i],  inverse=FALSE, w=w)            }
## qqplot(u, c(1:length(u))/length(u), xlab="quantiles of transformed data u_t", 
## ylab="quantiles of Uniform (0,1)"   )
u<- getU(x,Y,ban,delete, edgecorrect)
 w<-  dnorm((x-xf), mean = 0, sd = (0.37*ban))
 Yf<-uniformize(Y, u, inverse=TRUE, w=w)  }

Yf  }
######################################################### 
crossvalMF<- function(x,y, nstep=20,bmax=0, box=FALSE, L1=TRUE){
	# finds the best bandwidth for MPpp by cross-validation
	# searches for bandwidths from zero to bmax 
	# uses criterion L2 or L1 (default)
	# nstep is number of grid points for bandwidth search
	# box=FALSE means using Normal kernel for smoothing 
# IN VIEW OF THE CLOSENESS OF MFpp TO A KERNEL SMOOTHER THIS GIVES
# ALMOST IDENTICAL ANSWERS AS REGULAR crossval WHICH IS MUCH FASTER 

if (bmax==0){bmax<- 1.5*sqrt(var(x))}

bstep<-  bmax/nstep
n<- length(x)
SSE<- c(1:nstep)*0 

if(L1==FALSE){
	for (i in 1:nstep){
		for (k in 2:(n-1)){
	xx<- c(x[1:(k-1)], x[(k+1):n])
        yy<- c(y[1:(k-1)], y[(k+1):n]) # xx,yy are delete-k data
Yf<-  MFpp(xx,yy,x[k],ban=(i*bstep),box=FALSE,L2=TRUE)
     SSE[i]<- SSE[i]+(y[k]-Yf )^2
		}}}
		
		if(L1==TRUE){
	for (i in 1:nstep){
		for (k in 2:(n-1)){
	xx<- c(x[1:(k-1)], x[(k+1):n])
        yy<- c(y[1:(k-1)], y[(k+1):n]) # xx,yy are delete-k data
Yf<-  MFpp(xx,yy,x[k],ban=(i*bstep),box=FALSE,L2=FALSE) 
     SSE[i]<- SSE[i]+abs(y[k]-Yf ) 
		}} }

 k<- c(1:nstep)*bstep
k<- k[SSE==min(SSE,na.rm = TRUE)]
na.rm(k)}
###########################################################
na.rm<-function(x)
{	x[!is.na(x)]
}

############################################################ 	
crossval<- function(x,y, nstep=20,bmax=0,  L1=TRUE){
	# finds the best bandwidth for ksmooth by cross-validation
	# searches for bandwidts from zero to bmax 
	# uses criterion L1 or L2 (default)
	# nstep is number of grid points for bandwidth search
	# stud=TRUE means studentize by kernel smoothed estimate of variance 
        # (with same bandwidth)

 if (bmax==0){bmax<- 1.5*sqrt(var(x))}

bstep<-  bmax/nstep
n<- length(x)
SSE<- c(1:nstep)*0 

if(L1==FALSE){
	for (i in 1:nstep){
		for (k in 2:(n-1)){
			xx<- c(x[1:(k-1)], x[(k+1):n])
          yy<- c(y[1:(k-1)], y[(k+1):n])
kss<- ksmooth(xx,yy,"normal",ban=(i*bstep),x.points=x[k]) 
     # rr<-  kresid(xx,yy, (i*bstep))
     SSE[i]<- SSE[i]+(y[k]-kss$y )^2
		}}}
		
		if(L1==TRUE){
	for (i in 1:nstep){
		for (k in 2:(n-1)){
			xx<- c(x[1:(k-1)], x[(k+1):n])
         yy<- c(y[1:(k-1)], y[(k+1):n])
kss<- ksmooth(xx,yy,"normal",ban=(i*bstep),x.points=x[k]) 
   #  rr<-  kresid(xx,yy, (i*bstep))
     SSE[i]<- SSE[i]+ abs(y[k]-kss$y ) 
		}}}

 k<- c(1:nstep)*bstep
k<- k[SSE==min(SSE,na.rm = TRUE)]
na.rm(k)}

##########################################################
pred.intMF3<- function(x,y,xfut, model=0, B=999){
# gives key quantiles of the predictive distribution of yfut 
# to be observed at the value xfut using L2 predictors
# based on ROOT method of MF3 paper Sections 3 and 4
# options: model =0 means MB;  model=1 means MF/MB; 
# model =2 means MF2; and model=3 means MF/MF2 (MF3!).
# xfut can be multivariate if model =0 or 1 only.
# Cases MF2 and MF/MF2 are rather slow due to double loop
n<- length(x)

pp<- length(xfut) 
if (pp>1 & model==2) warning("need univariate xfut for MF2 option")
pseudoresp<- matrix(0,B,pp)  
pred.df<- matrix(0,B,pp) # each column is an empirical pred.d.f. for Yfut[j] at xfut[j]
	
b<-   crossval(x,y, L1=T)    # L1 cross validation
# b<-  crossvalMF(x,y, L1=T) # is an alternative but indistinguishable from above  
   ks <- ksmooth(x,y,"normal",ban= b,x.points = x) # Get a regression estimate for y vs. x.
   ks2 <- ksmooth(x,y^2,"normal",ban= b,x.points = x)  
   mm<- ks$y
   ss<- sqrt(ks2$y-mm^2)

ks<- ksmooth(x,y,"normal",ban= b,x.points = xfut)
mmfut<- ks$y
ks2 <- ksmooth(x,y^2,"normal",ban= b,x.points = xfut)  
ssfut<- sqrt(ks2$y-mmfut^2)
   ee<- kresid(x,y,b,stud=TRUE) 
   ee<- ee-mean(ee)
 if (model==1){ee<- pred.resid(x,y,b,stud=TRUE)} # used to be called: te

# Bootstrap part:
if (model<2){  # i.e., MB or MF/MB 
	for (i in 1:B) { eboot<- sample(ee,size=n, replace=TRUE )
  yboot<- mm+ss*eboot            # Ystar data 
pseudoresp[i,]<-  mmfut + ssfut* sample(ee, size=pp, replace=TRUE) # Yfut star  

  ks <- ksmooth(x,yboot,"normal",ban= b,x.points = xfut)  
   ks2 <- ksmooth(x,yboot^2,"normal",ban= b,x.points = xfut)  
 sboot<- sqrt(ks2$y-ks$y^2)
  pred.df[i,]<- pseudoresp[i,]- ks$y + mmfut   
 }}
 
if (model >=2){  # i.e., MF2 or MF3

   if (model==2){ delete<-  FALSE }
   if (model==3){ delete<-  TRUE}

   u<- getU(x,y,b,delete, edgecorrect=TRUE)

   PI<- MFpp(x,y, xfut, ban=b, box=FALSE, L2=TRUE, delete, edgecorrect=TRUE)
   Ystar<- u  # here we need xfut to be UNIVARIATE
   wf<-  dnorm((x-xfut), mean = 0, sd = (0.37*b))

for (j in 1:B) { uboot<- sample(u,size=n, replace=TRUE)
   for (i in 1:n){ # this computes the Ystar data 
   w<-  dnorm((x-x[i]), mean = 0, sd = (0.37*b))
   Ystar[i]<-  uniformize(y, uboot[i],  inverse=TRUE, w=w) }
   Yf<- uniformize(y, sample(u,1), inverse=TRUE, w=wf) 
   bootroot<- Yf-MFpp(x,Ystar, xfut, ban=b, box=FALSE, L2=TRUE, delete, edgecorrect=TRUE)
   pred.df[j,]<- bootroot  + PI  
}  }

# pred.df  #this matrix has columns=empirical pred.distributions
# quantile(pred.df, probs=c(alpha/2, 1-alpha/2))
qq<- matrix(0,12,pp)
for(j in 1:pp){
qq[,j]<- quantile(pred.df[,j], probs=c(2.5, 5, 10,40,45,47.5, 52.5, 55,60, 90,95,97.5)/100, na.rm=TRUE)
# output is 12 quantiles of the predictive distribution of Yfut
# e.g. 90% pred.int. for j-th column is (qq[2,j],qq[11,j])
}  
qq} #END

###################################
pred.intNORM<- function(x,y,xfut, alpha=0.05){
# construct a (1-alpha)100% predictive interval for yfut (to be observed at xfut)
# based on NORMAL approximation as in  MF2 paper 
bw<- crossval(x,y, L1=T)   
ks <- ksmooth(x, y, "normal", ban = bw,x.points=xfut)
mfut<- ks$y  
vfut<- mfut*0
pp <- length(xfut)
qq<- matrix(0,12,pp)
for(j in 1:pp){
qq[,j]<- qnorm(c(2.5, 5, 10,40,45,47.5, 52.5, 55,60, 90,95,97.5)/100)
# these are the 12 normal quantiles
vfut[j]<- sqrt( ksmooth.variance(x,y,bw, xfut[j], pred=T))
  qq[,j]<- mfut[j]+qq[,j]*vfut[j]
}

qq}

############################################
conf.intNORM<- function(x,y,xfut, alpha=0.05){ #JUNE 2012
# construct a (1-alpha)100% conf interval for E yfut (to be observed at xfut)
# based on NORMAL approximation as in  MF2 paper 
bw<- crossval(x,y, L1=T)   
ks <- ksmooth(x, y, "normal", ban = bw,x.points=xfut)
mfut<- ks$y # this is the point predictor/estimate of regression
vfut<- mfut*0
pp <- length(xfut)
qq<- matrix(0,12,pp)
for(j in 1:pp){
qq[,j]<- qnorm(c(2.5, 5, 10,40,45,47.5, 52.5, 55,60, 90,95,97.5)/100)
# these are the 12 normal quantiles
vfut[j]<- sqrt( ksmooth.variance(x,y,bw, xfut[j], pred=F)) # pred=F for conf
  qq[,j]<- mfut[j]+qq[,j]*vfut[j]
}

qq}  # DONE June 2012
############################
conf.intMF3multi<- function(x,y,xfut, model=0, B=999){
# JUNE 2012 but based on pred.intMF3multi of Nov 2011
# gives key quantiles of the condifence distribution for m(xfut)
# to be observed at the value xfut using L2 predictors
# based on ROOT method of MF3 paper Sections 3 and 4
# options: model =0 means MB;  model=1 means MF/MB;
# model =2 means uniformize by Lin.Interp. (POLYGON)
# model=3 means uniformize by kernel smoothed
# xfut can be multivariate for ALL models (even 2 and 3)
# Cases MF2 and MF/MF2 are rather slow due to double loop
# (triple loop if xfut is multivariate)

n<- length(x)

pp<- length(xfut)
# if (pp>1 & model==2) warning("need univariate xfut for MF2 option")
pseudoresp<- matrix(0,B,pp)
pred.df<- matrix(0,B,pp) # each column is an empirical pred.d.f. for Yfut[j] at xfut[j]

b<-   crossval(x,y, L1=T)    # L1 cross validation
# b<-  crossvalMF(x,y, L1=T) # is an alternative but indistinguishable from above
   ks <- ksmooth(x,y,"normal",ban= b,x.points = x) # Get a regression estimate for y vs. x.
   ks2 <- ksmooth(x,y^2,"normal",ban= b,x.points = x)
   mm<- ks$y
   ss<- sqrt(ks2$y-mm^2)

ks<- ksmooth(x,y,"normal",ban= b,x.points = xfut)
mmfut<- ks$y  # this is the point predictor/regression estimate at xfut
ks2 <- ksmooth(x,y^2,"normal",ban= b,x.points = xfut)
ssfut<- sqrt(ks2$y-mmfut^2)
   ee<- kresid(x,y,b,stud=TRUE)
   ee<- ee-mean(ee)
 if (model==1){ee<- pred.resid(x,y,b,stud=TRUE)} # used to be called: te

# Bootstrap part:
if (model<2){  # i.e., MB or MF/MB
        for (i in 1:B) { eboot<- sample(ee,size=n, replace=TRUE )
  yboot<- mm+ss*eboot            # Ystar data
pseudoresp[i,]<-  mmfut
# + ssfut* sample(ee, size=pp, replace=TRUE) # Yfut star
# the above is for confidence intervals so don't add random error
  ks2 <- ksmooth(x,yboot^2,"normal",ban= b,x.points = xfut)
  sboot<- sqrt(ks2$y-ks$y^2)
  pred.df[i,]<- pseudoresp[i,]- ks$y + mmfut
 }}

if (model >=2){  # i.e., MF2 or MF3
delete <- FALSE
   if (model==2){ uniformizeCC <-  uniformize
                  MFppCC<- MFpp
                } #  MFppDISTRCC <- MFppDISTR }
   if (model==3){ uniformizeCC <-  uniformize.kernel
                  MFppCC<- MFpp.kernel
               } #   MFppDISTRCC <- MFppDISTR.kernel }

    u<- getU(x,y,b,delete, edgecorrect=TRUE)
# above line not really EEDED
for (kk in 1:pp){

   PI<- MFppCC(x,y, xfut[kk], ban=b, box=FALSE, L2=TRUE, delete, edgecorrect=TRUE)
   Ystar<- u*0  # here we need xfut to be UNIVARIATE
   wf<-  dnorm((x-xfut[kk]), mean = 0, sd = (0.37*b))

for (j in 1:B) { #uboot<- sample(u,size=n, replace=TRUE)
uboot<- runif(n)
   for (i in 1:n){ # this computes the Ystar data
   w<-  dnorm((x-x[i]), mean = 0, sd = (0.37*b))
   Ystar[i]<-  uniformizeCC(y, uboot[i],  inverse=TRUE, w=w) }
   # NO: Yf<- uniformizeCC(y, sample(u,1), inverse=TRUE, w=wf)
   bootroot<- PI -MFpp(x,Ystar, xfut[kk], ban=b, box=FALSE, L2=TRUE, delete, edgecorrect=TRUE)
   pred.df[j,kk]<- bootroot  + PI
}  }  }

# pred.df  #this matrix has columns=empirical pred.distributions
# quantile(pred.df, probs=c(alpha/2, 1-alpha/2))
qq<- matrix(0,12,pp)
for(j in 1:pp){
qq[,j]<- quantile(pred.df[,j], probs=c(2.5, 5, 10,40,45,47.5, 52.5, 55,60, 90,95,97.5)/100, na.rm=
TRUE)
# output is 12 quantiles of the confidence distribution of Yfut
# e.g. 90% confidence .int. for j-th column is (qq[2,j],qq[11,j])
} 
qq} #END
############################################
############################################
## \int K^2 =0.761 for normal kernel
# \int K^2 =0.4857 for triangle
# and  \int K^2 =1 for box (in ksmooth)
################################## 
ksmooth.variance<- function(xx, yy = xx, b, xfut, pred = F){ # NEW FOR DEC 2012
# get estimate of variance of kernel smoothed regression at the point: xfut
#  xfut can be a vector 
n<- length(xx)
m<- length(xfut)
y.ones<- c(1:n)/ c(1:n)
dens<- xfut*0
for (k in 1: m){ dens[k] <-  sum(y.ones[ abs( xx-xfut[k]) < b/2])/(n*b)  }
# to give density of x-points at vector xfut  NEW!
        ks <- ksmooth(xx, yy, "normal", ban = b, x.points=xx)
        r <- yy - ks$y  # residuals
        VV <- (var(r) * 0.761)/(n*b*dens)
        if(pred == T) {    VV <- var(r) * (1 + 0.761/(n*b*dens))         }
       #pred=T gives the variance for a predictive interval at vector: xfut
        VV    }
# CORRECTED: DECEMBER 2012 

#######################################################
pred.resid<- function(x, y, b, stud = F, edgecorrect=TRUE){
# gets delete-1 predictive residuals for use in fast cross validation
# stud=T implies diving by local (predictive) standard deviation as in MF2 paper
mat<- cbind(x,y)
matt<- mat[order(mat[,1]), 1:2]
x<- matt[,1]
y<- matt[,2] # re-order data so that x is sorted 
# because ksmooth needs it
n <- length(x)
if(stud == F) {
rr <- c(1:n) * 0
for(k in 2:(n - 1)) {
xx <- c(x[1:(k - 1)], x[(k + 1):n])
yy <- c(y[1:(k - 1)], y[(k + 1):n])
kss <- ksmooth(xx, yy, "normal", ban = b, x.points = x[k])
rr[k] <- y[k] - kss$y}
xx <- x[2:n]
yy <- y[2:n]
kss <- ksmooth(xx, yy, "normal", ban = b, x.points = x[1])
rr[1] <- y[1] - kss$y
xx <- x[1:(n - 1)]
yy <- y[1:(n - 1)]
kss <- ksmooth(xx, yy, "normal", ban = b, x.points = x[n])
rr[n] <- y[n] - kss$y}
if(stud == T) {
rr <- c(1:n) * 0
for(k in 2:(n - 1)) {
xx <- c(x[1:(k - 1)], x[(k + 1):n])
yy <- c(y[1:(k - 1)], y[(k + 1):n])
kss <- ksmooth(xx, yy, "normal", ban = b, x.points = x[k])
kss2 <- ksmooth(xx, yy^2, "normal", ban = b, x.points = x[k])

vv<- max(c(0,kss2$y - kss$y^2), na.rm=TRUE) 
ss<- vv*0
if ( vv> 0 ) { ss<- sqrt(vv)} 
while (ss<= 0) {ss<- ss+0.001*sqrt(var(yy)) }

rr[k] <- (y[k] - kss$y)/ss }
xx <- x[2:n]
yy <- y[2:n]
kss <- ksmooth(xx, yy, "normal", ban = b, x.points = x[1])
kss2 <- ksmooth(xx, yy^2, "normal", ban = b, x.points = x[1])

vv<- max(c(0,kss2$y - kss$y^2), na.rm=TRUE) 
ss<- vv*0
if ( vv> 0 ) { ss<- sqrt(vv)} 
while (ss<= 0) {ss<- ss+0.001*sqrt(var(yy)) }

rr[1] <- (y[1] - kss$y)/ss
xx <- x[1:(n - 1)]
yy <- y[1:(n - 1)]
kss <- ksmooth(xx, yy, "normal", ban = b, x.points = x[n])
kss2 <- ksmooth(xx, yy^2, "normal", ban = b, x.points = x[n])

vv<- max(c(0,kss2$y - kss$y^2), na.rm=TRUE) 
ss<- vv*0
if ( vv> 0 ) { ss<- sqrt(vv)} 
while (ss<= 0) {ss<- ss+0.001*sqrt(var(yy)) }
 
rr[n] <- (y[n] - kss$y)/ss }
if (edgecorrect==TRUE){
xmin<- min(x, na.rm=T)
xmax<- max(x, na.rm=T)
rr[ abs(x-xmin) < b/2 | abs(x-xmax) < b/2 ]<- NA*rr[ abs(x-xmin) < b/2 | abs(x-xmax) < b/2 ]
}

na.rm(rr)}
#######################################################################
kresid<-function(x,y,b,stud = FALSE, edgecorrect=TRUE){
# calculates residuals from ksmooth with bandwidth b
# stud=TRUE implies diving by local (predictive) 
# standard deviation as in MF2 paper
mat<- cbind(x,y)
matt<- mat[order(mat[,1]), 1:2]
x<- matt[,1]
y<- matt[,2] # re-order data so that x is sorted 
# because ksmooth needs it 
n <- length(x)
ks <- ksmooth(x,y,"normal",ban=b,x.points=x  ) 
r<- y-ks$y
if (stud==TRUE){
ks2 <- ksmooth(x,y^2,"normal",ban=b,x.points=x ) 
ss<- y*0
vv<- y*0
aa<- na.rm(ks2$y - ks$y^2)
nn<- length(aa)
vv[1:nn]<- aa[1:nn]

if ( min(vv)>= 0 ) { ss<- sqrt(vv)} 
while (min(ss)<= 0) {ss<- ss+0.001*sqrt(var(y)) }
 
r<- r/ss}
if (edgecorrect==TRUE){
xmin<- min(x, na.rm=T)
xmax<- max(x, na.rm=T)
r[ abs(x-xmin) < b/2 | abs(x-xmax) < b/2 ]<- NA*r[ abs(x-xmin) < b/2 | abs(x-xmax) < b/2 ]
}

na.rm(r)}
############################################################## 
getU<- function(x,y,ban=0, delete=FALSE, edgecorrect=TRUE){ 
# get the transformed data in MF2 using normal kernel
# delete=F gives the regular u's of eq. (35) 
# delete=T gives the predictive u's from the delete-1 data--eq. (39)
# if edgecorrect=TRUE, leave out the u's that are too close to the edges
n<- length(x) 
if (ban==0) {ban<- crossval(x,y)}
u<- x 

if (delete==FALSE){
for (i in 1:n){ # this computes the u_t of eq. (35) 
 w<-  dnorm((x-x[i]), mean = 0, sd = (0.37*ban))
 u[i]<-  uniformize(y, y[i],  inverse=FALSE, w=w) } }

if (delete==TRUE){

for(k in 2:(n - 1)) {
xx <- c(x[1:(k - 1)], x[(k + 1):n])
yy <- c(y[1:(k - 1)], y[(k + 1):n])
 w<-  dnorm((xx-x[k]), mean = 0, sd = (0.37*ban))
 u[k]<-  uniformize(yy, y[k],  inverse=FALSE, w=w) } 

xx <- x[2:n]
yy <- y[2:n]
 w<-  dnorm((xx-x[1]), mean = 0, sd = (0.37*ban))
 u[1]<-  uniformize(yy, y[1],  inverse=FALSE, w=w)   

xx <- x[1:(n - 1)]
yy <- y[1:(n - 1)]
w<-  dnorm((xx-x[n]), mean = 0, sd = (0.37*ban))
 u[n]<-  uniformize(yy, y[n],  inverse=FALSE, w=w)   
} # end if delete==TRUE

if (edgecorrect==TRUE){
xmin<- min(x, na.rm=T)
xmax<- max(x, na.rm=T)
u[ abs(x-xmin) < ban/2 | abs(x-xmax) < ban/2 ]<- NA*u[ abs(x-xmin) < ban/2 | abs(x-xmax) < ban/2 ]
}

na.rm(u)}
#######################################################################
########### END OF MAIN FUNCTIONS #####################################
#######################################################################
####### BELOW ARE THREE OPTIONS FOR THE uniformize FUNCTION ###########
#######################################################################
uniformize.linear <- function(x, x0, inverse=FALSE, w) {
# MARCH 2010
# this function implements transformation (35) and its inverse
# using a piecewise linear estimate of local cdf
# IF inverse=FALSE, then x is really the Y data in regression
# IF inverse=TRUE, then x is actually the x variable
# x0 can be vector!
        n <- length(x)
        if (n <= 1) return(NA) # ("sample size too small!")
        if(missing(w)) {
                x <- sort(x)
                w <- (0:n)/n
        } else {
                if(any(w < 0)) warning("Negative weight encountered.")
                if(sum(w) != 1) {
                        w <- w/sum(w)
                        warning("Weights do not sum to 1.  Renormalizing.")
                }
                ord <- order(x)
                x <- x[ord]
                w <- cumsum(c(0,w[ord]))
        }

        midpoints <- (x[-n] + x[-1])/2
        midpoints <- c(2*x[1] - midpoints[1], midpoints, 2*x[n] - midpoints[n-1])

        if(inverse) {
                Finv <- approxfun(w, midpoints)
                return(Finv(x0))
        } else {
                Fmid <- approxfun(midpoints, w)
                F <- function(t) ifelse(t <= midpoints[1], 0,
                        ifelse(t >= midpoints[n+1], 1, Fmid(t)))
                return(F(x0))
        }
}

############################################################################
# BELOW IS A NEW VERSION OF THE uniformize R FUNCTION (Oct. 2011)
uniformize.linear2 <-  function(x, x0, inverse=FALSE, w) {
# October 2011
# this function implements transformation (35) and its inverse
# using a piecewise linear estimate of local cdf
# IF inverse=FALSE, then x is really the Y data in regression
# IF inverse=TRUE, then x is actually the x variable
# x0 can be vector!
	n <- length(x)
	if(missing(w)) {
		x <- sort(x)
		w <- (0:n)/n
	} else {
		if(any(w < 0)) warning("Negative weight encountered.")
		if(sum(w) != 1) { 
			w <- w/sum(w)
			warning("Weights do not sum to 1.  Renormalizing.")
		}
		ord <- order(x)
		x <- x[ord]
		w <- cumsum(c(0,w[ord]))
	}
	
	midpoints <- (x[-n] + x[-1])/2

	lwr <- min(2* midpoints[1] - midpoints[2], x[1] - (midpoints[1]-x[1]))
	upr <- max(2* midpoints[n-1] - midpoints[n-2], x[n] + (x[n] - midpoints[n-1]))
	midpoints <- c(lwr, midpoints, upr)
	
	if(inverse) {
		Finv <- approxfun(w, midpoints)
		return(Finv(x0))
	} else {
		Fmid <- approxfun(midpoints, w)
		F <- function(t) ifelse(t <= midpoints[1], 0, 
			ifelse(t >= midpoints[n+1], 1, Fmid(t)))
		return(F(x0))
	}
}

##################################################################

uniformizeSPlus <- function(x, x0, inverse=FALSE, w) {
#  Splus version of uniformize.linear 
	n <- length(x)
	
	if(missing(w)) {
		x <- sort(x)
		w <- (0:n)/n
	} else {
		if(any(w < 0)) warning("Negative weight encountered.")
		if(sum(w) != 1) { 
			w <- w/sum(w)
			warning("Weights do not sum to 1.  Renormalizing.")
		}
		ord <- order(x)
		x <- x[ord]
		w <- cumsum(c(0,w[ord]))
	}
	
	midpoints <- (x[-n] + x[-1])/2

	lwr <- min(2* midpoints[1] - midpoints[2], x[1] - (midpoints[1]-x[1]))
	upr <- max(2* midpoints[n-1] - midpoints[n-2], x[n] + (x[n] - midpoints[n-1]))

	midpoints <- c(lwr, midpoints, upr)
	
	if(inverse) {
		return(approx(w, midpoints, x0))
	} else {
		F <- function(t) {
			ret <- numeric(length(t))
			zeros <- (t <= midpoints[1])
			ones <- (t >= midpoints[n+1])
			mid <- ((t > midpoints[1]) & (t < midpoints[n+1]))
			
			ret[mid] <- approx(midpoints, w, t[mid])$y
			ret[ones] <- 1
			ret[zeros] <- 0
			return(ret)
		}
		
		return(F(x0))
	}
}

#################################################################

uniformize.kernel <- function(x, x0, inverse=FALSE, w) { 
# APRIL 2012 
# New R function that does uniformize on the basis
# of a kernel smoothed edf (need kernel > 0 everywhere
# to ensure the resulting edf is invertible)
n <- length(x) 
if (n <= 1) return(NA) # ("sample size too small!")
if(missing(w)) {
w <- c(1:n)*0+1/n } 
if(any(w < 0)) warning("Negative weight encountered.")
if(sum(w) != 1) { 
w <- w/sum(w)
warning("Weights do not sum to 1.  Renormalizing.")
}
if ( is.na(sum(x)) ) {
w<- w[!is.na(x)]
w <- w/sum(w)
x<- x[!is.na(x)]
warning("missing values in x") }
# first compute the smoothed edf, i.e., the inverse=FALSE case 
ppdf<- density(x, weights = w ) # default uses gaussian kernel
npdf<- length(ppdf$x) 
delta<- ppdf$x[2]-ppdf$x[1] 
pcdf<- cumsum(ppdf$y)*delta 
if(inverse) { 
FFinv <- approxfun(pcdf, ppdf$x) 
return(FFinv(x0)) 
#In the inverse case x0 denotes the vector of y values to which FFinv is applied 
} 
else { 
FF <- approxfun(ppdf$x, pcdf) 
return(FF(x0)) 
#Here x0 denotes the vector of x values to which FF is applied 
}}
#################################################################### 
####################### END ######################################## 
#################################################################### 


#### Examples ####
#The piecewise linear edf
x <- sort(rnorm(10))
x <- c(min(x), x)
w <- c(rep(1,10),2)
w <- w/sum(w)
#plot(ecdf(x))
plot(stepfun(x, cumsum(c(0,w))))
tmp <- seq(min(x)-.4,max(x)+.4, .01)
lines(tmp, uniformizeSPlus(x, tmp, w=w), col="red")

#The inverse of the piecewise linear edf
#Two lines are added to the plot produced above
#which demonstrate Finv(.75) is the point on the fitted
#function.
abline(v=uniformize(x, .75, inverse=TRUE, w=w), col="blue")
abline(h=.75, col="blue")