# Get Gene Ontology information for CAMDA06 microarray dataset by probe # using the Bioconductor biomaRt package. After running script the GOinfo # variable contains the Gene Ontology data, which is also written to the # file GOinfo.csv. # efg, Stowers Institute, 26 Jan 2006 # UNIX: change "BasePath" based on Windows/UNIX platform switch(.Platform$OS.type, windows = BasePath <- "U:", unix = BasePath <- "/n/projects", stop("unsupported OS platform") ) filename <- paste(BasePath, "/camda/2006/ecom2.mwgdna.com/gene_id_human_40k_a.txt", sep="") human40k <- read.delim(filename, comment.char="", as.is=TRUE) dim(human40k) GOinfo <- NULL library(biomaRt) mart <- martConnect() probe.empty <- substr(human40k$accession.reference,1, 5) == "empty" sum(probe.empty) # 192 #human40k$accession.reference[probe.empty] probe.control <- substr(human40k$accession.reference,1,13) == "mwgaracontrol" sum(probe.control) # 208 #human40k$accession.reference[probe.control] probe.human <- ( substr(human40k$accession.reference,1,8) == "mwghuman" ) | ( substr(human40k$accession.reference,1,13) == "mwghumcontrol") sum(probe.human) # 60 #human40k$accession.reference[probe.human] probe.good <- !(probe.empty | probe.control | probe.human) sum(probe.good) # 19700 probe.list <- human40k$accession.reference[probe.good] # Write GO information to disk file, one line at a time outGONums <- file("GONums.txt", "w") for (i in 1:length(probe.list)) { probe <- probe.list[i] if (substr(probe, nchar(probe)-1, nchar(probe)-1) == "_") { probe <- substr(probe,1, nchar(probe)-2) } else { if (substr(probe, nchar(probe)-2, nchar(probe)-2) == "_") { probe <- substr(probe,1, nchar(probe)-3) } else { probe <- paste("ERROR", probe) } } cat(i, probe, "\n") # show progress flush.console() # Assume embl ID x <- getGO(id=probe,type="embl",species="hsapiens",mart=mart) if ( (length(x@table$GOID) == 1) & is.na(x@table$GOID[1]) ) { # IF embl ID fails, try as refseq x <- getGO(id=probe,type="refseq",species="hsapiens",mart=mart) } cat(x@id[1], x@table$GOID, "\n", file=outGONums) d <- data.frame(x@table) d <- cbind(x@id, d) names(d)[1] <- "id" GOinfo <- rbind(GOinfo, d) } close(outGONums) martDisconnect(mart) write.csv(GOinfo, file="GOinfo.csv")