Showing posts with label large data points. Show all posts
Showing posts with label large data points. Show all posts

Sunday, April 28, 2013

RG#93: Add countour or heat map plot to XY scatter plot

# data
set.seed(1234)
n <- 10000
X = rnorm (n, 10, 4)
Y = X*1.5 + rnorm (n, 0, 8)


## colour brewing
library(RColorBrewer)
g = 11
my.cols <- rev(brewer.pal(g, "RdYlBu"))

#compute 2D kernel density

# kernel density using MASS 
library(MASS)
z <- kde2d(X, Y, n=50)

plot(X, Y, xlab="X", ylab="Y", pch=19, cex=.3, col = "gray60")
contour(z, drawlabels=FALSE, nlevels=g, col=my.cols, add=TRUE, lwd = 2)
abline(h=mean(Y), v=mean(X), lwd=2, col = "black")
legend("topleft", paste("r=", round(cor(X, Y),2)), bty="n")

## estimate the z counts
prob <- c(.99, .95, .90, .8, .5, .1, 0.05)
dx <- diff(z$x[1:2])
dy <- diff(z$y[1:2])
sz <- sort(z$z)

c1 <- cumsum(sz) * dx * dy

levels <- sapply(prob, function(x) {
              approx(c1, sz, xout = 1 - x)$y })

plot(X,Y, col = "gray80", pch = 19, cex = 0.3)
contour(z, levels= round (levels,7), add=T, col = "red")




# smooth scatter
require(KernSmooth)
smoothScatter(X, Y, nrpoints=.3*n, colramp=colorRampPalette(my.cols), pch=19, cex=.3, col = "green1")





 

Tuesday, April 9, 2013

RG#49 : Sunflower plot


# data
set.seed(1233) 
x <- round (runif(1500, 0, 40),0)
y <- round (runif (1500, 0, 100),0)


plot (x,y, pch = 21)
sunflowerplot(x,y, pch = 19, col = "gray80", cex = 1, cex.fact = 1, size = .08, seg.lwd = .8 ) 


Monday, April 8, 2013

RG#44: Hexabin XY scatter plot and transparent point XY plot


# data
set.seed(1234)
x = rnorm(10000, 50, 30)
y = x*0.6 + rnorm (10000, 0, 30)
df <- data.frame(x,y)


ggplot(df,aes(x=x,y=y)) + stat_binhex() + theme_bw()




# vivid colored 
ggplot(df,aes(x=x,y=y)) + stat_binhex(colour="white",na.rm=TRUE) + scale_fill_gradientn(colours=c("green1","red"),name = "Frequency",na.value=NA)+ theme_bw()




# plot with transparency 

require(ggplot2)
ggplot(df,aes(x=x,y=y)) + geom_point(alpha = 0.3, col = "red") + theme_bw()


# in base

# data
set.seed(1234)
x = rnorm(10000, 50, 30)
y = x*0.6 + rnorm (10000, 0, 30)
df <- data.frame(x,y)


plot(df$x, df$y, pch = 19, cex = 1, col = rgb(0,1,0, alpha = 0.1))


# alpha function to introduce transparency
require(RColorBrewer)

add.alpha <- function(col, alpha=1){
if(missing(col))
stop("vector of colours missing")
apply(sapply(col, col2rgb)/255, 2,
function(x)
rgb(x[1], x[2], x[3], alpha=alpha)) 
}
# POINT SIZE AND TRANSPARENCY
plot (df$x, df$y, pch = 19, cex = 0.5, col = add.alpha ("red", 0.2))


RG#26: Plot of large number of data points (using IDPmisc)

# data 
x <- rnorm(100000, 50, 20)

y <-  x*0.6 + rnorm (100000, 0, 8)
y1 <- runif (100000, 0, 100)

#plot
require(IDPmisc) 
iplot(x, y)
iplot(x, y, pixs=2)



iplot(x = x, y = y1)