一般數據結構與格式Part 1 阿舍&Oliver

在進入數據結構之前…

常見的R數據類型

integer(numeric)
double(numeric)
complex
character
logical

double & integer (numeric)

辨識資料類型：typeof()(較新) / mode()(較舊)
R 自動將數字視為double precision real numbers
如果要純整數，數字後要加L

typeof(2)

## [1] "double"

typeof(2L)

## [1] "integer"

double & integer (numeric)

typeof(Inf)  #Inf 無限大也是數值

## [1] "double"

typeof(NaN)  #not a number #數學運算中產生的無解

## [1] "double"

(0/0)

## [1] NaN

(sqrt(-25))  #sqrt()開根號

## [1] NaN

double & integer (numeric)

mode()辨識度不如typeof()細緻

mode(2)

## [1] "numeric"

mode(2L)

## [1] "numeric"

double & integer (numeric)

Integer vectors exist so that data can be passed to C or Fortran code which expects them

(int <-5L)

## [1] 5

(double <- 5)

## [1] 5

眼見不為憑

Computers use finite precision arithmetic ! Every number you see is an approximation.

sqrt(2) ^ 2 == 2

## [1] FALSE

1/49 * 49 == 1

## [1] FALSE

眼見不為憑

dplyr套件的 near()

near(sqrt(2)^ 2,2)  #開根號

## [1] TRUE

near(1/49 * 49,1)

## [1] TRUE

眼見不為憑

R 處理數值的限制會因為電腦設備而有不同
以下兩個function分別表示目前 R 所能處理的最大浮點數與最小正浮點數

.Machine$double.xmax

## [1] 1.797693e+308

.Machine$double.xmin

## [1] 2.225074e-308

source：https://blog.gtwang.org/r/r-variables-and-workspace/

character

使用雙引號""

typeof(2)

## [1] "double"

typeof("2")

## [1] "character"

typeof("yoyo")

## [1] "character"

logical

TRUE(or T) / FALSE (or F) / NA(Missing Value)

typeof(FALSE)

## [1] "logical"

typeof(T)

## [1] "logical"

typeof(NA)  #not available #預設類型

## [1] "logical"

NA(Missing values)

typeof(NULL) #就是自己

## [1] "NULL"

(c(1,2,NULL,4)) #本來無一物  何處惹塵埃

## [1] 1 2 4

(W <- c("one","two",NA,"four")) #挖了個坑 不知道裡面的東西是啥

## [1] "one"  "two"  NA     "four"

typeof(W[3]) #可強制轉化為其他類型

## [1] "character"

NA(Missing values)

is.na(NaN)  #數學運算中產生的遺失值

## [1] TRUE

is.nan(NA)  #NA可以是多種類型的遺失值

## [1] FALSE

NA(Missing values)

Missing values are “contagious”

NA > 5

## [1] NA

NA + 10

## [1] NA

NA / 2

## [1] NA

為何 NA == NA 不是對的

Let x be Mary's age. We don't know how old she is.

x <- NA

Let y be John's age. We don't know how old he is.

y <- NA

Are John and Mary the same age?

x == y

## [1] NA

Because we don't know!

判斷是否為…

is.numeric(5)

## [1] TRUE

is.integer(5.2)

## [1] FALSE

is.character("Yo")

## [1] TRUE

is.logical(NaN)

## [1] FALSE

轉化資料類型

as.numeric('yo')  #強制變更過程中產生了 NA

## [1] NA

as.integer(5.8)

## [1] 5

as.character(FALSE)

## [1] "FALSE"

as.logical(7)  # 0 = FALSE / 非零數值=  TRUE

## [1] TRUE

資料結構

vector(向量)
matrix(矩陣)
data frame(資料框架)
list(列表)
array(陣列)

向量

x1 <- c("father","mother","brother","sister")
is.vector(x1)

## [1] TRUE

length(x1)

## [1] 4

x1[2] <- "father"
x1

## [1] "father"  "father"  "brother" "sister"

矩陣

A <- matrix(c(F,T), nrow = 1, ncol = 2)
A

##       [,1] [,2]
## [1,] FALSE TRUE

B <- matrix(1:6, nrow = 2, ncol = 3 ,byrow = T)
B

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6

class(B)

## [1] "matrix"

dim(B)

## [1] 2 3

length(B)

## [1] 6

資料框架

x1 <- c("father","mother","brother","sister")
x2 <- c("Canon","Pentax","Olympus","Nikon")
x3 <- c("gold","red","green","blue")
x4 <- c(2,1,1,2)
camera <- data.frame(member = x1, brand = x2, color = x3, amount = x4)
camera

##    member   brand color amount
## 1  father   Canon  gold      2
## 2  mother  Pentax   red      1
## 3 brother Olympus green      1
## 4  sister   Nikon  blue      2

檢視資料框架

camera$age <- sample(90,4)
camera$age

## [1] 49 11 76 41

dim(camera)

## [1] 4 5

檢視資料框架

nrow(camera)

## [1] 4

ncol(camera)

## [1] 5

檢視資料框架

camera

##    member   brand color amount age
## 1  father   Canon  gold      2  49
## 2  mother  Pentax   red      1  11
## 3 brother Olympus green      1  76
## 4  sister   Nikon  blue      2  41

rownames(camera)

## [1] "1" "2" "3" "4"

colnames(camera)

## [1] "member" "brand"  "color"  "amount" "age"

列表

list1 <- list(camera,A,B)
list1

## [[1]]
##    member   brand color amount age
## 1  father   Canon  gold      2  49
## 2  mother  Pentax   red      1  11
## 3 brother Olympus green      1  76
## 4  sister   Nikon  blue      2  41
## 
## [[2]]
##       [,1] [,2]
## [1,] FALSE TRUE
## 
## [[3]]
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6

列表

unlist(list1[2:3])

## [1] 0 1 1 4 2 5 3 6

列表

names(list1) <- c("camera","A","B")
list1

## $camera
##    member   brand color amount age
## 1  father   Canon  gold      2  49
## 2  mother  Pentax   red      1  11
## 3 brother Olympus green      1  76
## 4  sister   Nikon  blue      2  41
## 
## $A
##       [,1] [,2]
## [1,] FALSE TRUE
## 
## $B
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6

列表

list1$B

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6

list1[2]

## $A
##       [,1] [,2]
## [1,] FALSE TRUE

list1[[2]]

##       [,1] [,2]
## [1,] FALSE TRUE

陣列

array(0,dim = c(2:3))

##      [,1] [,2] [,3]
## [1,]    0    0    0
## [2,]    0    0    0

array(1:6,dim = c(2:3))

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

b <- 1:12
dim(b) <- c(2,2,3)

陣列

dimnames(b) <- list(c("r1","r2"),c("c1","c2"),c("A","B","C"))

陣列

## , , A
## 
##    c1 c2
## r1  1  3
## r2  2  4
## 
## , , B
## 
##    c1 c2
## r1  5  7
## r2  6  8
## 
## , , C
## 
##    c1 c2
## r1  9 11
## r2 10 12

陣列

b[,,1:2]

## , , A
## 
##    c1 c2
## r1  1  3
## r2  2  4
## 
## , , B
## 
##    c1 c2
## r1  5  7
## r2  6  8

Thank you!