Data Frame
A data frame is a table
- The column names should be non-empty
- The row names should be unique
- The data stored in a data frame can be of numeric, factor or character type
- Each column should contain same number of data items
Create Data Frame
emp.data <- data.frame(
emp_id = c (1:5),
emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
salary = c(623.3,515.2,611.0,729.0,843.25),
start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
"2015-03-27")),
stringsAsFactors = FALSE
)
print(emp.data)
str(emp.data) # structure
print(summary(emp.data)) # summary
Accesing
emp.data <- data.frame(
emp_id = c (1:5),
emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
salary = c(623.3,515.2,611.0,729.0,843.25),
start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
"2015-03-27")),
stringsAsFactors = FALSE
)
print(emp.data$emp_name) # get column, vector
print(emp.data[1, ]) # get row, dataframe
print(emp.data[1, 2]) # get a specific element
Operations
emp.data <- data.frame(
emp_id = c (1:5),
emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
salary = c(623.3,515.2,611.0,729.0,843.25),
start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
"2015-03-27")),
stringsAsFactors = FALSE
)
print(names(emp.data)) # names
print(dim(emp.data)) # dim
emp.data$dept <- c("IT","Operations","IT","HR","Finance") # add a column
print(emp.data)
emp.data = emp.data[, names(emp.data) != "dept"] # remove a column
print(emp.data)
# add a row
new.emp.data <- data.frame(
emp_id = 6,
emp_name = "Mike",
salary = 700,
start_date = as.Date("2010-01-01"),
stringsAsFactors = FALSE
)
emp.data = rbind(emp.data, new.emp.data)
print(emp.data)
# remove a row
print(emp.data[c(1:2, 4:dim(emp.data)[1]),])
# add a column
library(dplyr)
emp.data <- emp.data %>% mutate(salary_year = salary*12)
print(emp.data)
# filter
emp.data = emp.data %>% filter(salary > 700)
print(emp.data)