Showing posts with label categorical data. Show all posts
Showing posts with label categorical data. Show all posts

Thursday, April 25, 2013

RG#90: fluctutation diagram: graphical representation of a contingency table

set.seed (1234)
myd <- data.frame (x1 = rnorm (1000, 15, 5), x2 = sample (c("A", "B", "C"), 1000, replace = TRUE), x3 = sample (c(1,2,2), 1000, replace = TRUE))


# fluctuation plot
require (ggplot2)

ggfluctuation(table(myd$x2, myd$x3))+theme_bw()

ggfluctuation(table(myd$x2, myd$x3), type="colour")+theme_bw()

ggfluctuation(table(cut (myd$x1,4), myd$x3), type="colour")+theme_bw()



ggfluctuation(table(cut (myd$x1,5), myd$x3))+theme_bw()





Saturday, April 13, 2013

RG#56: heatmap plot of categorical variables



#data 
datf <- data.frame(indv=factor(paste("ID", 1:20),
    levels =rev(paste("ID", 1:20))), matrix(sample(LETTERS[1:7], 400, T), ncol = 20))



library(ggplot2); 
library(reshape2)
# converting data to long form for ggplot2 use

datf1 <- melt(datf, id.var = 'indv')

ggplot(datf1, aes(variable, indv)) + geom_tile(aes(fill = value),
   colour = "white")  +   scale_fill_manual(values= rainbow (7))



Monday, April 8, 2013

RG#42: Association plot (categorical data)


# data 
set.seed (1234)
myd <- data.frame (fact1 = sample (c("A", "B", "C", "D"), 200, replace = TRUE), 
fact2 = sample (c("HL", "PS",  "DS"), 200, replace = TRUE), 
fact3 = sample (c("Male", "Female"), 200, replace = TRUE))


#plot 
# vcd package is for visualization of categorical data 
require(vcd)
mytable <- table (myd)
assoc(mytable, shade=TRUE, legend=TRUE)



RG#41: Mosaic plot: visualization of categorical data

# data 
set.seed (1234)
myd <- data.frame (fact1 = sample (c("A", "B", "C", "D"), 200, replace = TRUE), 
fact2 = sample (c("HL", "PS",  "DS"), 200, replace = TRUE), 
fact3 = sample (c("Male", "Female"), 200, replace = TRUE))


#plot 
# vcd package is for visualization of categorical data 
require(vcd)
mytable <- table (myd)
mosaic(mytable, shade=TRUE, legend=TRUE)



RG#40: Spine plot


set.seed(123)
myd <- data.frame (grp1 = rep(c("A", "B", "C", "D"), each = 5), grp2 = rep(c("G1", "G2", "G3", "G4", "G5"), 4), yvr1 = rnorm (20, 10,3), yvr2 = rnorm (20, 50,12))


 require(grDevices)


# plotting 
spineplot(grp1 ~ grp2, data = myd, col = c("gray50",  "blue", "green4", "tan", "purple"))




## (dependence on a numerical variable)
spineplot(grp1 ~ yvr1, data=myd)
spineplot(grp1 ~ yvr1, data=myd, breaks = 3, col = c("green1", "green3", "darkgreen"))
spineplot(grp1 ~ yvr1, data=myd,breaks = quantile(myd$yvr1) )



RG#39: plot factors (factor by factor plot)

set.seed(123)
myd <- data.frame (grp1 = rep(c("A", "B", "C", "D"), each = 5), grp2 = rep(c("G1", "G2", "G3", "G4", "G5"), 4), yvr1 = rnorm (20, 10,3), yvr2 = rnorm (20, 50,12))


 require(grDevices)



# plotting 
plot(grp1 ~ grp2, data = myd, col = c("gray50",  "blue", "green4", "tan", "purple")) 




# color brewing continum color
# we want five color in yellow orange and red
# using cut to categories a continous variable to factor

library(RColorBrewer)
 ncolor <- 5
 plotclr <- brewer.pal(ncolor,"YlOrRd")# brew new color
plot(cut(yvr1, 5) ~ grp1, data = myd, col = plotclr)