在wordcloud中更改特定的单词颜色

Tyl*_*ker 17 r word-cloud

我想用R构建一个文字云(我已经使用wordcloud完成了),然后将特定颜色的颜色设置为某种颜色.目前,该功能的行为是根据频率对颜色词进行着色(这可能很有用),但是单词大小已经这样做,所以我想用颜色来增加其他含义.

关于如何在wordcloud中为特定单词着色的任何想法?(如果在R中有另一个wordcloud函数,我不知道我更愿意去那条路.)

一个模拟的例子和我的尝试(我试图在同一庄园中处理颜色参数我会从绘图函数中定期绘制):

library(wordcloud)

x <- paste(rep("how do keep the two words as one chunk in the word cloud", 3), 
           collapse = " ")
X <- data.frame(table(strsplit(x, " ")))
COL <- ifelse(X$Var1 %in% c("word", "cloud", "words"), "red", "black")
wordcloud(X$Var1, X$Freq, color=COL)
Run Code Online (Sandbox Code Playgroud)

编辑:我想补充一点,wordcloud的新版​​本(2010年1月10日;版本2.0)[谢谢Ian Fellows和David Robinson]现在是这个功能以及其他一些非常棒的补充.以下是在wordcloud中完成原始目标的代码:

wordcloud(X$Var1, X$Freq, color=COL, ordered.colors=TRUE, random.color=FALSE)
Run Code Online (Sandbox Code Playgroud)

Dav*_*son 14

编辑:如评论中所述,下面描述的功能现已添加到wordcloud库中.


我的方法是采用R函数的代码并对其进行自定义.它只需要改变几行,现在可以采用单一颜色或相同长度的颜色矢量words.

library(wordcloud)

colored.wordcloud <- function(words,freq,scale=c(4,.5),min.freq=3,max.words=Inf,random.order=TRUE,random.color=FALSE,
        rot.per=.1,colors="black",ordered.colors=FALSE,use.r.layout=FALSE,...) { 
    tails <- "g|j|p|q|y"
    last <- 1
    nc<- length(colors)

    if (ordered.colors) {
        if (length(colors) != 1 && length(colors) != length(words)) {
            stop(paste("Length of colors does not match length of words",
                       "vector"))
        }
    }

    overlap <- function(x1, y1, sw1, sh1) {
        if(!use.r.layout)
            return(.overlap(x1,y1,sw1,sh1,boxes))
        s <- 0
        if (length(boxes) == 0) 
            return(FALSE)
        for (i in c(last,1:length(boxes))) {
            bnds <- boxes[[i]]
            x2 <- bnds[1]
            y2 <- bnds[2]
            sw2 <- bnds[3]
            sh2 <- bnds[4]
            if (x1 < x2) 
                overlap <- x1 + sw1 > x2-s
            else 
                overlap <- x2 + sw2 > x1-s

            if (y1 < y2) 
                overlap <- overlap && (y1 + sh1 > y2-s)
            else 
                overlap <- overlap && (y2 + sh2 > y1-s)
            if(overlap){
                last <<- i
                return(TRUE)
            }
        }
        FALSE
    }

    ord <- rank(-freq, ties.method = "random")
    words <- words[ord<=max.words]
    freq <- freq[ord<=max.words]
    if (ordered.colors) {
        colors <- colors[ord<=max.words]
    }

    if(random.order)
        ord <- sample.int(length(words))
    else
        ord <- order(freq,decreasing=TRUE)
    words <- words[ord]
    freq <- freq[ord]
    words <- words[freq>=min.freq]
    freq <- freq[freq>=min.freq]
    if (ordered.colors) {
        colors <- colors[ord][freq>=min.freq]
    }

    thetaStep <- .1
    rStep <- .05
    plot.new()
    op <- par("mar")
    par(mar=c(0,0,0,0))
    plot.window(c(0,1),c(0,1),asp=1)
    normedFreq <- freq/max(freq)
    size <- (scale[1]-scale[2])*normedFreq + scale[2]
    boxes <- list()



    for(i in 1:length(words)){
        rotWord <- runif(1)<rot.per
        r <-0
        theta <- runif(1,0,2*pi)
        x1<-.5
        y1<-.5
        wid <- strwidth(words[i],cex=size[i],...)
        ht <- strheight(words[i],cex=size[i],...)
        #mind your ps and qs
        if(grepl(tails,words[i]))
            ht <- ht + ht*.2
        if(rotWord){
            tmp <- ht
            ht <- wid
            wid <- tmp  
        }
        isOverlaped <- TRUE
        while(isOverlaped){
            if(!overlap(x1-.5*wid,y1-.5*ht,wid,ht) &&
                    x1-.5*wid>0 && y1-.5*ht>0 &&
                    x1+.5*wid<1 && y1+.5*ht<1){
        if (!random.color) {
                if (ordered.colors) {
                    cc <- colors[i]
                }
                else {
                    cc <- ceiling(nc*normedFreq[i])
                    cc <- colors[cc]
                }
        } else {
         cc <- colors[sample(1:nc,1)]
        }
                text(x1,y1,words[i],cex=size[i],offset=0,srt=rotWord*90,
                        col=cc,...)
                #rect(x1-.5*wid,y1-.5*ht,x1+.5*wid,y1+.5*ht)
                boxes[[length(boxes)+1]] <- c(x1-.5*wid,y1-.5*ht,wid,ht)
                isOverlaped <- FALSE
            }else{
                if(r>sqrt(.5)){
                    warning(paste(words[i],
                                    "could not be fit on page. It will not be plotted."))
                    isOverlaped <- FALSE
                }
                theta <- theta+thetaStep
                r <- r + rStep*thetaStep/(2*pi)
                x1 <- .5+r*cos(theta)
                y1 <- .5+r*sin(theta)
            }
        }
    }
    par(mar=op)
    invisible()
}
Run Code Online (Sandbox Code Playgroud)

尝试一些代码:

colors = c("blue", "red", "orange", "green")
colored.wordcloud(colors, c(10, 5, 3, 9), colors=colors)
Run Code Online (Sandbox Code Playgroud)

  • 当然.wordcloud分布在lgpl下.您的另一个选择是向我提交补丁,添加其他功能. (2认同)
  • 刚刚发布的新版[wordcloud](http://cran.r-project.org/web/packages/wordcloud/index.html)现在包含此功能(以及[其他一些很酷的功能](http:/ /blog.fellstat.com/?p=101)).距离@ TylerRinker最初的问题还有36个小时 - 不错,团队! (2认同)