将类"dist"的对象转换为r中的数据帧

Nad*_*ina 9 r distance dataframe

如果可能的话,将数据帧转换为类"dist"的对象,是否可以做相反的事情呢?将类"dist"转换为数据框?例如

<dist(hasil)

   1            2            3           4
2  0.088814413                                    
3  0.084929382  0.030413813                        
4  0.063245553  0.029120440 0.044418465            
5  0.061983869  0.027018512 0.036400549 0.009055385
Run Code Online (Sandbox Code Playgroud)

以及数据框中的结果

<

   col          row          distance
   1            2            0.088814413
   1            3            0.084929382          
   1            4            0.063245553
   1            5            0.061983869
   2            3            0.030413813
   2            4            0.029120440
   2            5            0.027018512
   3            4            0.044418465
   3            5            0.036400549
   4            5            0.009055385
Run Code Online (Sandbox Code Playgroud)

Jak*_*ead 16

library(maps)
data(us.cities)

d <- dist(head(us.cities[c("lat", "long")]))

##           1         2         3         4         5
## 2 20.160489                                        
## 3 23.139853 40.874243                              
## 4 15.584303  9.865374 38.579820                    
## 5 27.880674  7.882037 48.707100 15.189882          
## 6 26.331187 41.720457  6.900101 41.036931 49.328558

library(reshape2)

df <- melt(as.matrix(d), varnames = c("row", "col"))

df[df$row > df$col,]
##    row col     value
## 2    2   1 20.160489
## 3    3   1 23.139853
## 4    4   1 15.584303
## 5    5   1 27.880674
## 6    6   1 26.331187
## 9    3   2 40.874243
## 10   4   2  9.865374
## 11   5   2  7.882037
## 12   6   2 41.720457
## 16   4   3 38.579820
## 17   5   3 48.707100
## 18   6   3  6.900101
## 23   5   4 15.189882
## 24   6   4 41.036931
## 30   6   5 49.328558
Run Code Online (Sandbox Code Playgroud)


A5C*_*2T1 6

我实际上会写一个这样的函数:

myFun <- function(inDist) {
  if (class(inDist) != "dist") stop("wrong input type")
  A <- attr(inDist, "Size")
  B <- if (is.null(attr(inDist, "Labels"))) sequence(A) else attr(inDist, "Labels")
  if (isTRUE(attr(inDist, "Diag"))) attr(inDist, "Diag") <- FALSE
  if (isTRUE(attr(inDist, "Upper"))) attr(inDist, "Upper") <- FALSE
  data.frame(
    row = B[unlist(lapply(sequence(A)[-1], function(x) x:A))],
    col = rep(B[-length(B)], (length(B)-1):1),
    value = as.vector(inDist))
}
Run Code Online (Sandbox Code Playgroud)

现在,假设我们开始(注意非数字行和列名称):

dd <- as.dist((1 - cor(USJudgeRatings)[1:5, 1:5])/2)
#            CONT       INTG       DMNR       DILG
# INTG 0.56659545                                 
# DMNR 0.57684427 0.01769236                      
# DILG 0.49380400 0.06424445 0.08157452           
# CFMG 0.43154385 0.09295712 0.09332092 0.02060062
Run Code Online (Sandbox Code Playgroud)

我们可以通过简单的方式改变它:

myFun(dd)
#     row  col      value
# 1  INTG CONT 0.56659545
# 2  DMNR CONT 0.57684427
# 3  DILG CONT 0.49380400
# 4  CFMG CONT 0.43154385
# 5  DMNR INTG 0.01769236
# 6  DILG INTG 0.06424445
# 7  CFMG INTG 0.09295712
# 8  DILG DMNR 0.08157452
# 9  CFMG DMNR 0.09332092
# 10 CFMG DILG 0.02060062
Run Code Online (Sandbox Code Playgroud)

快速的性能比较:

set.seed(1)
x <- matrix(rnorm(1000*1000), nrow = 1000)
dd <- dist(x)

## Jake's function
fun2 <- function(inDist) {
  df <- melt(as.matrix(inDist), varnames = c("row", "col"))
  df[as.numeric(df$row) > as.numeric(df$col), ]
}

all(fun2(dd) == myFun(dd))
# [1] TRUE
system.time(fun2(dd))
#    user  system elapsed 
#   0.346   0.002   0.349 
system.time(myFun(dd))
#    user  system elapsed 
#   0.012   0.000   0.015
Run Code Online (Sandbox Code Playgroud)