資料為1998-2015各年度我國學生赴主要留學國家留學簽證人數統計表

問題:

1.台灣留學生去各國的人數平均和中位數和最多人數

2.留學生去歐洲情況,德國法國的選擇

3.英國或美國是否為對立的主要抉擇

4.18年來學生選擇留學各國的消長變化

library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
require(stats)
rawdata = read.csv('studyabroad.csv',
                   header = TRUE)
lastyearId = length(rawdata$year)
firstYear = rawdata$year[1]
lastYear = rawdata$year[lastyearId]
n = lastYear - firstYear + 1
allType = names(rawdata)
rownames(rawdata) <- 1:nrow(rawdata)

#敘述統計
summary(rawdata)
##       year         American          U.K         Australia   
##  Min.   :1998   Min.   :10324   Min.   :3367   Min.   :2065  
##  1st Qu.:2002   1st Qu.:14212   1st Qu.:3843   1st Qu.:2377  
##  Median :2006   Median :14897   Median :6363   Median :2751  
##  Mean   :2006   Mean   :14910   Mean   :6230   Mean   :3094  
##  3rd Qu.:2011   3rd Qu.:15582   3rd Qu.:8321   3rd Qu.:3186  
##  Max.   :2015   Max.   :19402   Max.   :9653   Max.   :6651  
##      Japan          Canada         France          Germany      
##  Min.   :1337   Min.   : 826   Min.   : 342.0   Min.   : 295.0  
##  1st Qu.:1708   1st Qu.:1782   1st Qu.: 566.5   1st Qu.: 400.5  
##  Median :2266   Median :2154   Median : 706.5   Median : 512.0  
##  Mean   :2444   Mean   :2122   Mean   : 730.5   Mean   : 560.5  
##  3rd Qu.:3061   3rd Qu.:2414   3rd Qu.: 921.8   3rd Qu.: 643.5  
##  Max.   :4703   Max.   :3984   Max.   :1100.0   Max.   :1252.0  
##    NewZealand   
##  Min.   :250.0  
##  1st Qu.:475.8  
##  Median :539.0  
##  Mean   :531.9  
##  3rd Qu.:612.5  
##  Max.   :743.0
#留學生去歐洲情況
cor.test(rawdata$France,rawdata$Germany)
## 
##  Pearson's product-moment correlation
## 
## data:  rawdata$France and rawdata$Germany
## t = 7.4472, df = 16, p-value = 1.386e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7034034 0.9550338
## sample estimates:
##      cor 
## 0.880966
#留學生去歐美的選擇
cor.test(rawdata$American,rawdata$U.K)
## 
##  Pearson's product-moment correlation
## 
## data:  rawdata$American and rawdata$U.K
## t = -0.25267, df = 16, p-value = 0.8037
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5147620  0.4160734
## sample estimates:
##         cor 
## -0.06304287
#1998-2015整體變化情況
typeId = c(2:9)
newTable = data.frame()
for( nid in c(1:n) )
{
  year = as.matrix(rep(rawdata$year[nid], length(rawdata[nid,typeId])))
  people = as.matrix(as.numeric(rawdata[nid,typeId]))
  type = as.matrix(as.character(allType[typeId]))
  temp = cbind(year, log(people), type)
  newTable = rbind(newTable, temp)
}
names(newTable) = c('year', 'people', 'country')
newTable = newTable[with(newTable, order(country)),]
rownames(newTable) <- 1:nrow(newTable)
p <- plot_ly(data = newTable, x = ~year, 
             y = ~people, color = ~country) %>%
  add_lines( yaxis = list(range = c(0,10)))
p
## Warning in arrange_impl(.data, dots): '.Random.seed' is not an integer
## vector but of type 'NULL', so ignored