df1=data.frame(c("male","female","male"),c("1","2","3","4","5","6"),seq(141,170))
names(df1) = c("gender","age","height")
df1$age <- factor(df1$age,
levels=c(1,2,3,4,5,6),
labels=c("16-24","25-34","35-44","45-54","55-64","65+"))
q1a=c(1,0,1,0,0,1);q1b=c(0,0,2,2,2,0);q1c=c(0,0,3,3,0,3) #1,2 and 3 used to be compatible with existing datasets. Could change all to 1 if necessary.
df2=data.frame(q1a=q1a,q1b=q1b,q1c=q1c); df1 <- cbind(df1,df2)
rm(q1a,q1b,q1c,df2)
Run Code Online (Sandbox Code Playgroud)
我希望复制R中SPSS的多个响应问题的分析.
目前我正在使用此代码:
#creating function for analysing questions with grouped data
multfreqtable <- function(a, b, c){
# number of respondents (for percent of cases)
totrep=sum(a==1|b==2|c==3)
#creating frequency table
table_a=data.frame("a",sum(a==1))
names(table_a)=c("question","freq")
table_b=data.frame("b",sum(b==2))
names(table_b)=c("question","freq")
table_c=data.frame("c",sum(c==3))
names(table_c)=c("question","freq")
table_question <-rbind(table_a,table_b,table_c)
#remove individual question tables
rm(table_a,table_b,table_c)
#adding total
total=as.data.frame("Total")
totalsum=(sum(table_question$freq,na.rm=TRUE))
totalrow=cbind(total,totalsum)
names(totalrow)=c("question","freq")
table_question=rbind(table_question,totalrow)
#adding percentage column to frequency table
percentcalc=as.numeric(table_question$freq)
percent=(percentcalc/totalsum)*100
table_question<-cbind(table_question,percent)
#adding percent of cases column to frequency table
poccalc=as.numeric(table_question$freq)
percentofcases=(poccalc/totrep)*100
table_question<-cbind(table_question,percentofcases)
#print percent of cases value
total_respondents <<- data.frame(totrep)
#remove all unnecessary data and values
rm(total,totalsum,totalrow,b,c,percent,percentcalc,percentofcases,totrep,poccalc)
return(table_question)
}
#calling function - must tie to data.frame using $ !!!
q1_frequency<-multfreqtable(df1$q1a,df1$q1b,df1$q1c)
#renaming percent of cases - This is very important while using current method
total_respondents_q1 <- total_respondents
rm(total_respondents)
Run Code Online (Sandbox Code Playgroud)
结果生成此表:

我正在寻找一种更有效的方法,如果有更多或更少的多项选择问题,理想情况下不需要编辑函数.
您的功能实际上太复杂了,无法满足您的需求.我认为像这样的函数应该工作并且更灵活.
multfreqtable = function(data, question.prefix) {
# Find the columns with the questions
a = grep(question.prefix, names(data))
# Find the total number of responses
b = sum(data[, a] != 0)
# Find the totals for each question
d = colSums(data[, a] != 0)
# Find the number of respondents
e = sum(rowSums(data[,a]) !=0)
# d + b as a vector. This is your overfall frequency
f = as.numeric(c(d, b))
data.frame(question = c(names(d), "Total"),
freq = f,
percent = (f/b)*100,
percentofcases = (f/e)*100 )
}
Run Code Online (Sandbox Code Playgroud)
在示例数据集中添加另一个问题:
set.seed(1); df1$q2a = sample(c(0, 1), 30, replace=T)
set.seed(2); df1$q2b = sample(c(0, 2), 30, replace=T)
set.seed(3); df1$q2c = sample(c(0, 3), 30, replace=T)
Run Code Online (Sandbox Code Playgroud)
为"q1"响应创建一个表:
> multfreqtable(df1, "q1")
question freq percent percentofcases
1 q1a 15 33.33333 60
2 q1b 15 33.33333 60
3 q1c 15 33.33333 60
4 Total 45 100.00000 180
Run Code Online (Sandbox Code Playgroud)
为"q2"回复制作一个表格:
> multfreqtable(df1, "q2")
question freq percent percentofcases
1 q2a 14 31.11111 53.84615
2 q2b 13 28.88889 50.00000
3 q2c 18 40.00000 69.23077
4 Total 45 100.00000 173.07692
Run Code Online (Sandbox Code Playgroud)
这是该函数的修改版本,允许您一次创建多个问题的表列表:
multfreqtable = function(data, question.prefix) {
z = length(question.prefix)
temp = vector("list", z)
for (i in 1:z) {
a = grep(question.prefix[i], names(data))
b = sum(data[, a] != 0)
d = colSums(data[, a] != 0)
e = sum(rowSums(data[,a]) !=0)
f = as.numeric(c(d, b))
temp[[i]] = data.frame(question = c(sub(question.prefix[i],
"", names(d)), "Total"),
freq = f,
percent = (f/b)*100,
percentofcases = (f/e)*100 )
names(temp)[i] = question.prefix[i]
}
temp
}
Run Code Online (Sandbox Code Playgroud)
例子:
> multfreqtable(df1, "q1")
$q1
question freq percent percentofcases
1 a 15 33.33333 60
2 b 15 33.33333 60
3 c 15 33.33333 60
4 Total 45 100.00000 180
> test1 = multfreqtable(df1, c("q1", "q2"))
> test1
$q1
question freq percent percentofcases
1 a 15 33.33333 60
2 b 15 33.33333 60
3 c 15 33.33333 60
4 Total 45 100.00000 180
$q2
question freq percent percentofcases
1 a 14 31.11111 53.84615
2 b 13 28.88889 50.00000
3 c 18 40.00000 69.23077
4 Total 45 100.00000 173.07692
> test1$q1
question freq percent percentofcases
1 a 15 33.33333 60
2 b 15 33.33333 60
3 c 15 33.33333 60
4 Total 45 100.00000 180
Run Code Online (Sandbox Code Playgroud)