問題:
1.台灣留學生去各國的人數平均和中位數和最多人數
2.留學生去歐洲情況,德國法國的選擇
3.英國或美國是否為對立的主要抉擇
4.18年來學生選擇留學各國的消長變化
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
require(stats)
rawdata = read.csv('studyabroad.csv',
header = TRUE)
lastyearId = length(rawdata$year)
firstYear = rawdata$year[1]
lastYear = rawdata$year[lastyearId]
n = lastYear - firstYear + 1
allType = names(rawdata)
rownames(rawdata) <- 1:nrow(rawdata)
#敘述統計
summary(rawdata)
## year American U.K Australia
## Min. :1998 Min. :10324 Min. :3367 Min. :2065
## 1st Qu.:2002 1st Qu.:14212 1st Qu.:3843 1st Qu.:2377
## Median :2006 Median :14897 Median :6363 Median :2751
## Mean :2006 Mean :14910 Mean :6230 Mean :3094
## 3rd Qu.:2011 3rd Qu.:15582 3rd Qu.:8321 3rd Qu.:3186
## Max. :2015 Max. :19402 Max. :9653 Max. :6651
## Japan Canada France Germany
## Min. :1337 Min. : 826 Min. : 342.0 Min. : 295.0
## 1st Qu.:1708 1st Qu.:1782 1st Qu.: 566.5 1st Qu.: 400.5
## Median :2266 Median :2154 Median : 706.5 Median : 512.0
## Mean :2444 Mean :2122 Mean : 730.5 Mean : 560.5
## 3rd Qu.:3061 3rd Qu.:2414 3rd Qu.: 921.8 3rd Qu.: 643.5
## Max. :4703 Max. :3984 Max. :1100.0 Max. :1252.0
## NewZealand
## Min. :250.0
## 1st Qu.:475.8
## Median :539.0
## Mean :531.9
## 3rd Qu.:612.5
## Max. :743.0
#留學生去歐洲情況
cor.test(rawdata$France,rawdata$Germany)
##
## Pearson's product-moment correlation
##
## data: rawdata$France and rawdata$Germany
## t = 7.4472, df = 16, p-value = 1.386e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7034034 0.9550338
## sample estimates:
## cor
## 0.880966
#留學生去歐美的選擇
cor.test(rawdata$American,rawdata$U.K)
##
## Pearson's product-moment correlation
##
## data: rawdata$American and rawdata$U.K
## t = -0.25267, df = 16, p-value = 0.8037
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5147620 0.4160734
## sample estimates:
## cor
## -0.06304287
#1998-2015整體變化情況
typeId = c(2:9)
newTable = data.frame()
for( nid in c(1:n) )
{
year = as.matrix(rep(rawdata$year[nid], length(rawdata[nid,typeId])))
people = as.matrix(as.numeric(rawdata[nid,typeId]))
type = as.matrix(as.character(allType[typeId]))
temp = cbind(year, log(people), type)
newTable = rbind(newTable, temp)
}
names(newTable) = c('year', 'people', 'country')
newTable = newTable[with(newTable, order(country)),]
rownames(newTable) <- 1:nrow(newTable)
p <- plot_ly(data = newTable, x = ~year,
y = ~people, color = ~country) %>%
add_lines( yaxis = list(range = c(0,10)))
p
## Warning in arrange_impl(.data, dots): '.Random.seed' is not an integer
## vector but of type 'NULL', so ignored