2012年10月31日星期三

R语言数据操作:笔记 chap 8

按地区划分收入高于和低于收入中位数的州的数量:
hiinc = state.x[ , 'income' ] > median(state.x[ , 'income' ])

stateinc = table(state.region , hinnc)

#f_stateinc 共三列:state.region, hinnc, freq(频数)

f_stateinc = as.data.frame(stateinc)

按各列取值去重,算频数:
as.data.frame( table(x) )

添加行合计/列合计:
tt = table(infert$edu, infert$partity)

addmargins(tt,c(1,2))

频率统计:prop.table

按列计算:
sumfun = function(x) c(n=sum(!is.na(x)), mean = mean(x), sd=sd(x))

x = apply(somedata, 2, sumfun)

矩阵每一列除以该列的最大值:
maxes = apply(somedata,2,max)

final = sweep(somedata,2,maxes,"/")

按time和diet分组,求weight的均值:
cweights = aggregate(data$weight,data[c('time','diet')], mean)

cweights = tapply(data$weight, data[c('time', 'diet')], mean)



mclick = melt(somedata, measure.var = 'weight')

cast(mclick, diet + time ~ variable, mean)

reshape包

以region为行,求各变量的均值
cast(somedata,region~variable,mean)

以每个变量为一行
cast(somedata,variable~region,mean)

指定变量子集
cast(somedata, region~variable, mean, subset=variable %in% c('population', 'life'))

求各列变量均值、中位数、标准差
cast(somedata, variable~., c(mean,median,sd))

按region汇总各列变量均值、中位数、标准差
cast(somedata, region~variable, c(mean,median,sd))

没有评论:

发表评论