# efg, Stowers Institute, 25 April 2006 #source("0-LoadData.R") log2dNoControls <- log2d[, !ControlSet] dNoControls <- d[, !ControlSet] pdf("MicroarrayScaled.pdf", width=8, height=8) par(oma=c(1,0,1,0)) hist(log2dNoControls, breaks=200, ylim=c(0,90000), xlab="Log2[Expression]", main="Raw Data") mtext(paste(nrow(log2dNoControls), " microarrays, ", ncol(log2dNoControls), " probes", sep=""), line=0.5) text(0.1, 80000, paste( sprintf("%.1f", 100 * sum(log2dNoControls == 0) / prod(dim(log2dNoControls))), "% 0s", sep=""), adj=0, col="blue") PlotFooter("5-PatientScaled.R 1a") # Method 1 -- Scale by using median of original raw data # Row Medians - Exclude Zeros row.medians.ExcludeZeros <- numeric(nrow(dNoControls)) for (j in 1:nrow(dNoControls)) { PatientExpression <- dNoControls[j,] PatientExpression <- PatientExpression[PatientExpression != 0] row.medians.ExcludeZeros[j] <- median(PatientExpression) } PatientScaled <- sweep(log2dNoControls, BY.ROW<-1, log2(row.medians.ExcludeZeros), FUN="-") hist(PatientScaled, breaks=200, ylim=c(0,90000), xlab="Microarray Normalized", main="Expression / RawMedian") mtext(paste(nrow(PatientScaled), " microarrays, ", ncol(PatientScaled), " probes", sep=""), line=0.5) PlotFooter("5-PatientScaled.R 1a") PatientScaled[PatientScaled <= -5.5] <- -5.5 hist(PatientScaled, breaks=200, ylim=c(0,90000), xlab="Microarray Normalized", main="Expression / RawMedian with Floor -5.5") mtext(paste(nrow(PatientScaled), " microarrays, ", ncol(PatientScaled), " probes", sep=""), line=0.5) text(-5, 90000, paste( sprintf("%.1f", 100 * sum(PatientScaled <= -5.5) / prod(dim(PatientScaled))), "% <= -5.5", sep=""), adj=0, col="blue") PlotFooter("5-PatientScaled.R 1a") # Method 2 -- Scale by median log2(Expression) # Patient Scaling by Median (Each Patient log2 median assigned to 1.0 after ignoring zero values) # Row Medians - Exclude Zeros row.medians.ExcludeZeros <- numeric(nrow(log2dNoControls)) for (j in 1:nrow(log2dNoControls)) { PatientExpression <- log2dNoControls[j,] PatientExpression <- PatientExpression[PatientExpression != 0] row.medians.ExcludeZeros[j] <- median(PatientExpression) } PatientScaled <- sweep(log2dNoControls, BY.ROW<-1, row.medians.ExcludeZeros, FUN="/") hist(PatientScaled, breaks=200, ylim=c(0,90000), xlab="Microarray Scaled", main="log2(Expression) / Median[ log2(Expression) ]") mtext(paste(nrow(PatientScaled), " microarrays, ", ncol(PatientScaled), " probes", sep=""), line=0.5) text(0.05, 90000, paste( sprintf("%.1f", 100 * sum(PatientScaled == 0) / prod(dim(PatientScaled))), "% 0s", sep=""), adj=0, col="blue") PlotFooter("5-PatientScaled.R 1b") # Method 3 Treat much like Affy Data scaled to # target intensity of 150. (Could use 500, which is the Affy default). # # Scale original data to have median of 150, then take log2 transform # Row Medians - Exclude Zeros row.medians.ExcludeZeros <- numeric(nrow(dNoControls)) for (j in 1:nrow(dNoControls)) { PatientExpression <- dNoControls[j,] # Normally would want "fuzzy" comparison against 0.0 with floating point # but here require a "true" zero value. PatientExpression <- PatientExpression[PatientExpression != 0] row.medians.ExcludeZeros[j] <- median(PatientExpression) } PatientScaled <- sweep(dNoControls, BY.ROW<-1, 150.0/row.medians.ExcludeZeros, FUN="*") PatientScaled[PatientScaled != 0] = log2(PatientScaled[PatientScaled != 0]) hist(PatientScaled, breaks=200, ylim=c(0,90000), xlab="Microarray Scaled", main="Raw Median Scaled to 150") mtext(paste(nrow(PatientScaled), " microarrays, ", ncol(PatientScaled), " probes", sep=""), line=0.5) text(0.05, 90000, paste( sprintf("%.1f", 100 * sum(PatientScaled == 0) / prod(dim(PatientScaled))), "% 0s", sep=""), adj=0, col="blue") PatientScaled[PatientScaled < 0] <- 0 hist(PatientScaled, breaks=200, ylim=c(0,90000), xlab="Microarray Scaled", main="Raw Median Scaled to 150, Floor at 0") mtext(paste(nrow(PatientScaled), " microarrays, ", ncol(PatientScaled), " probes", sep=""), line=0.5) text(0.05, 90000, paste( sprintf("%.1f", 100 * sum(PatientScaled == 0) / prod(dim(PatientScaled))), "% 0s", sep=""), adj=0, col="blue") PlotFooter("5-PatientScaled.R 1c") dev.off() write.csv(PatientScaled, file="PatientScaledExpression.csv") stopifnot( all(rownames(Info) == rownames(PatientScaled)) ) ProbesAndInfo <- merge(Info, PatientScaled, by.x="row.names", by.y="row.names") write.csv(ProbesAndInfo, file="ProbesAndInfo.csv") library(fBasics) stat.mean <- apply(PatientScaled, BY.COLUMN<-2, mean) stat.sd <- apply(PatientScaled, BY.COLUMN<-2, sd) stat.skew <- apply(PatientScaled, BY.COLUMN<-2, skewness) # from fBasics stat.kurt <- apply(PatientScaled, BY.COLUMN<-2, kurtosis) # from fBasics par(mfrow=c(2,2)) DataName <- "Scaled Patient Expression" hist(stat.mean, main=paste(DataName, "Means"), breaks=100) hist(stat.sd, main=paste(DataName, "StDev"), breaks=100) hist(stat.skew, main=paste(DataName, "Skewness"), breaks=100) hist(stat.kurt, main=paste(DataName, "Kurtosis"), breaks=100) PlotFooter("5-PatientScaled.R 2") dev.off() png("PatientScaledStats%d.png", width=1000, height=1000) plot(stat.sd, stat.mean, main="Mean Vs SD") plot(stat.sd, stat.skew, main="Skewness Vs SD") plot(stat.sd, stat.kurt, main="Kurtosis Vs SD") plot(stat.mean, stat.skew, main="Skewness Vs Mean") plot(stat.mean, stat.kurt, main="Kurtosis Vs Mean") plot(stat.skew, stat.kurt, main="Kurtosis Vs Skewness") PlotFooter("5-PatientScaled.R 3") dev.off()