# Match array probes with gene IDs from biomaRt # efg, Stowers Institute, 3 Feb 2006 # UNIX: change "BasePath" based on Windows/UNIX platform switch(.Platform$OS.type, windows = BasePath <- "U:", unix = BasePath <- "/n/projects", stop("unsupported OS platform") ) filename <- paste(BasePath, "/efg/camda/2006/arrayprobes.csv", sep="") array.probes <- read.csv(filename, as.is=TRUE) probe.list <- array.probes$probe length(probe.list) length(unique(probe.list)) GOinfo <- NULL library(biomaRt) mart <- martConnect() # Write GO information to disk file, one line at a time outGONums <- file("ArrayProbeGONums.txt", "w") for (i in 1:length(probe.list)) { probe <- probe.list[i] cat(i, probe, "\n") # show progress flush.console() # Assume embl ID x <- getGO(id=probe,type="embl",species="hsapiens",mart=mart) if ( (length(x@table$GOID) == 1) & is.na(x@table$GOID[1]) ) { # IF embl ID fails, try as refseq x <- getGO(id=probe,type="refseq",species="hsapiens",mart=mart) } cat(x@id[1], x@table$GOID, "\n", file=outGONums) d <- data.frame(x@table) d <- cbind(x@id, d) names(d)[1] <- "id" GOinfo <- rbind(GOinfo, d) } close(outGONums) martDisconnect(mart) write.csv(GOinfo, row.names=FALSE, file="ArrayProbeGOInfo.csv")