我在R中有两个数据框,我需要逐行计算元素匹配,最后获得一个列,其中包含两个表的笛卡尔积的长度和两行的ID.此外,表格非常大,行数不同,但列数相同.
我有以下代码,但多次运行时速度很慢.
library(data.table)
table_1<-data.table(matrix(c(1:24),nrow = 4))
table_2<-data.table(matrix(c(11:34),nrow = 4))
names(table_1)<-c("s1", "s2","s3","s4","s5","s6")
names(table_2)<-c("a1","a2","a3","a4","a5","a6")
table_1$ID<-seq.int(nrow(table_1))
table_2$ID_ap<-seq.int(nrow(table_2))
setcolorder(table_1, c("ID", "s1", "s2","s3","s4","s5","s6"))
setcolorder(table_2, c("ID_ap","a1","a2","a3","a4","a5","a6"))
CJ.table<-function(X,Y) setkey(X[,c(k=1,.SD)],k)[Y[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
join<-CJ.table(table_1,table_2)
R<-subset(join, select=c("ID_ap","ID"))
R$Ac<- (join$s1 == join$a1) + (join$s1 ==join$a2) + (join$s1 ==join$a3) + (join$s1 ==join$a4) + (join$s1 ==join$a5) + (join$s1 ==join$a6)+
(join$s2 == join$a1) + (join$s2 ==join$a2) + (join$s2 ==join$a3) + (join$s2 ==join$a4) + (join$s2 ==join$a5) + (join$s2 ==join$a6)+
(join$s3 == join$a1) + (join$s3 ==join$a2) + (join$s3 ==join$a3) + (join$s3 ==join$a4) + (join$s3 ==join$a5) + (join$s3 ==join$a6)+ …Run Code Online (Sandbox Code Playgroud)