hiinc = state.x[ , 'income' ] > median(state.x[ , 'income' ])
stateinc = table(state.region , hinnc)
#f_stateinc 共三列:state.region, hinnc, freq(频数)
f_stateinc = as.data.frame(stateinc)
按各列取值去重,算频数:
as.data.frame( table(x) )
添加行合计/列合计:
tt = table(infert$edu, infert$partity)
addmargins(tt,c(1,2))
频率统计:prop.table
按列计算:
sumfun = function(x) c(n=sum(!is.na(x)), mean = mean(x), sd=sd(x))
x = apply(somedata, 2, sumfun)
矩阵每一列除以该列的最大值:
maxes = apply(somedata,2,max)
final = sweep(somedata,2,maxes,"/")
按time和diet分组,求weight的均值:
cweights = aggregate(data$weight,data[c('time','diet')], mean)
cweights = tapply(data$weight, data[c('time', 'diet')], mean)
或
mclick = melt(somedata, measure.var = 'weight')
cast(mclick, diet + time ~ variable, mean)
reshape包
以region为行,求各变量的均值
cast(somedata,region~variable,mean)
以每个变量为一行
cast(somedata,variable~region,mean)
指定变量子集
cast(somedata, region~variable, mean, subset=variable %in% c('population', 'life'))
求各列变量均值、中位数、标准差
cast(somedata, variable~., c(mean,median,sd))
按region汇总各列变量均值、中位数、标准差
cast(somedata, region~variable, c(mean,median,sd))
没有评论:
发表评论