dplyr
Load
  1. library(dplyr)
Create Data Frame
  1. emp.data <- data.frame(
  2. emp_id = c (1:5),
  3. emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
  4. salary = c(623.3,515.2,611.0,729.0,843.25),
  5. start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
  6. "2015-03-27")),
  7. stringsAsFactors = FALSE
  8. )
%>%, pipe operator
  1. emp.data %>% select(emp_id, salary) %>% head(n=2L)
filter, select rows in a data.frame that fit one or more logical expressions
  1. filter(emp.data, salary > 700)
  2. filter(emp.data, emp_name %in% c('Rick', 'Dan'))
arrange, sort data.frame according to one or more columns
  1. arrange(emp.data, salary) # sorted by ascending order
  2. arrange(emp.data, desc(salary)) # sorted by descending order
select, select columns, or rename existing columns
  1. select(emp.data, emp_id, salary) # select specific columns
  2. select(emp.data, -salary) # select all columns except a specific column, use "-" operator
  3. select(emp.data, emp_id:salary) # select a range of columns by name, use the “:” (colon) operator
  4. select(emp.data, emp_id, sal = salary) # select specific columns and rename a column
  5. select(emp.data, one_of('emp_id', 'salary', 'start_date')) # select specific columns
  6. select(emp.data, contains('emp')) # select columns whose column name contain "emp"
  7. select(emp.data, starts_with('s')) # select columns whose name starts with "s"
  8. select(emp.data, ends_with('ry')) # select columns whose name ends with "ry"
rename, rename columns
  1. rename(emp.data, sal = salary) # rename column
distinct, select unique rows based on the content of one or more columns
  1. distinct(mtcars, cyl)
mutate, edit or add columns
  1. mutate(emp.data, annual = salary*12) # add column "annual"
transmute, only keep columns that are mentioned in the function
  1. transmute(emp.data, emp_id, salary, annual = salary*12) # keep columns "emp_id", "salary", "annual"
summarise, summarise columns
  1. summarise(emp.data, mean_salary = mean(salary))
group_by, split the data frame by some variable
  1. mtcars %>% group_by(cyl) %>% summarise(mean_mpg = mean(mpg))
sample_n, sample n rows from data.frame
  1. sample_n(emp.data, 4) # randomly select n entries from data frame
sample_frac, sample frac fraction of rows
  1. sample_frac(emp.data, 0.5)
do, execute R expression
  1. result = do(mtcars, model = lm(mpg ~ wt, data = .))
  2. print(result$model)
Reference
  • dplyr Tutorial
  • Tutorial