dplyr
Load
library(dplyr)
		
Create Data Frame
emp.data <- data.frame(
   emp_id = c (1:5), 
   emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
   salary = c(623.3,515.2,611.0,729.0,843.25), 
   
   start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
      "2015-03-27")),
   stringsAsFactors = FALSE
)
		
%>%, pipe operator
emp.data %>% select(emp_id, salary) %>% head(n=2L)
		
filter, select rows in a data.frame that fit one or more logical expressions
filter(emp.data, salary > 700)
filter(emp.data, emp_name %in% c('Rick', 'Dan'))
		
arrange, sort data.frame according to one or more columns
arrange(emp.data, salary) # sorted by ascending order
arrange(emp.data, desc(salary)) # sorted by descending order
		
select, select columns, or rename existing columns
select(emp.data, emp_id, salary) # select specific columns
select(emp.data, -salary) # select all columns except a specific column, use "-" operator
select(emp.data, emp_id:salary) # select a range of columns by name, use the “:” (colon) operator
select(emp.data, emp_id, sal = salary) # select specific columns and rename a column
select(emp.data, one_of('emp_id', 'salary', 'start_date')) # select specific columns
select(emp.data, contains('emp')) # select columns whose column name contain "emp"
select(emp.data, starts_with('s')) # select columns whose name starts with "s"
select(emp.data, ends_with('ry')) # select columns whose name ends with "ry"
		
rename, rename columns
rename(emp.data, sal = salary) # rename column
		
distinct, select unique rows based on the content of one or more columns
distinct(mtcars, cyl)
		
mutate, edit or add columns
mutate(emp.data, annual = salary*12) # add column "annual"
		
transmute, only keep columns that are mentioned in the function
transmute(emp.data, emp_id, salary, annual = salary*12) # keep columns "emp_id", "salary", "annual"
		
summarise, summarise columns
summarise(emp.data, mean_salary = mean(salary))
		
group_by, split the data frame by some variable
mtcars %>% group_by(cyl) %>% summarise(mean_mpg = mean(mpg))
		
sample_n, sample n rows from data.frame
sample_n(emp.data, 4) # randomly select n entries from data frame
		
sample_frac, sample frac fraction of rows
sample_frac(emp.data, 0.5)
		
do, execute R expression
result = do(mtcars, model = lm(mpg ~ wt, data = .))
print(result$model)
		
Reference
  • dplyr Tutorial
  • Tutorial