Data Frame
  • A data frame is a table
  • Create Data Frame
    emp.data <- data.frame(
       emp_id = c (1:5), 
       emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
       salary = c(623.3,515.2,611.0,729.0,843.25), 
       
       start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
          "2015-03-27")),
       stringsAsFactors = FALSE
    )
    
    print(emp.data)
    str(emp.data) # structure
    print(summary(emp.data)) # summary
    		
    Accesing
    emp.data <- data.frame(
       emp_id = c (1:5), 
       emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
       salary = c(623.3,515.2,611.0,729.0,843.25), 
       
       start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
          "2015-03-27")),
       stringsAsFactors = FALSE
    )
    
    print(emp.data$emp_name) # get column, vector
    print(emp.data[1, ]) # get row, dataframe
    print(emp.data[1, 2]) # get a specific element
    		
    Operations
    emp.data <- data.frame(
       emp_id = c (1:5), 
       emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
       salary = c(623.3,515.2,611.0,729.0,843.25), 
       
       start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
          "2015-03-27")),
       stringsAsFactors = FALSE
    )
    
    print(names(emp.data)) # names
    print(dim(emp.data)) # dim
    
    emp.data$dept <- c("IT","Operations","IT","HR","Finance") # add a column
    print(emp.data)
    
    emp.data = emp.data[, names(emp.data) != "dept"] # remove a column
    print(emp.data)
    
    # add a row
    new.emp.data <- data.frame(
    	emp_id = 6,
    	emp_name = "Mike",
    	salary = 700,
    	start_date = as.Date("2010-01-01"),
    	stringsAsFactors = FALSE
    			   )
    emp.data = rbind(emp.data, new.emp.data)
    print(emp.data)
    
    # remove a row
    print(emp.data[c(1:2, 4:dim(emp.data)[1]),])
    
    # add a column
    library(dplyr)
    emp.data <- emp.data %>% mutate(salary_year = salary*12)
    print(emp.data)
    
    # filter
    emp.data = emp.data %>% filter(salary > 700)
    print(emp.data)