dplyr
Load
Create Data Frame
- emp.data <- data.frame(
- emp_id = c (1:5),
- emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
- salary = c(623.3,515.2,611.0,729.0,843.25),
-
- start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
- "2015-03-27")),
- stringsAsFactors = FALSE
- )
-
%>%, pipe operator
- emp.data %>% select(emp_id, salary) %>% head(n=2L)
-
filter, select rows in a data.frame that fit one or more logical expressions
- filter(emp.data, salary > 700)
- filter(emp.data, emp_name %in% c('Rick', 'Dan'))
-
arrange, sort data.frame according to one or more columns
- arrange(emp.data, salary) # sorted by ascending order
- arrange(emp.data, desc(salary)) # sorted by descending order
-
select, select columns, or rename existing columns
- select(emp.data, emp_id, salary) # select specific columns
- select(emp.data, -salary) # select all columns except a specific column, use "-" operator
- select(emp.data, emp_id:salary) # select a range of columns by name, use the “:” (colon) operator
- select(emp.data, emp_id, sal = salary) # select specific columns and rename a column
- select(emp.data, one_of('emp_id', 'salary', 'start_date')) # select specific columns
- select(emp.data, contains('emp')) # select columns whose column name contain "emp"
- select(emp.data, starts_with('s')) # select columns whose name starts with "s"
- select(emp.data, ends_with('ry')) # select columns whose name ends with "ry"
-
rename, rename columns
- rename(emp.data, sal = salary) # rename column
-
distinct, select unique rows based on the content of one or more columns
mutate, edit or add columns
- mutate(emp.data, annual = salary*12) # add column "annual"
-
transmute, only keep columns that are mentioned in the function
- transmute(emp.data, emp_id, salary, annual = salary*12) # keep columns "emp_id", "salary", "annual"
-
summarise, summarise columns
- summarise(emp.data, mean_salary = mean(salary))
-
group_by, split the data frame by some variable
- mtcars %>% group_by(cyl) %>% summarise(mean_mpg = mean(mpg))
-
sample_n, sample n rows from data.frame
- sample_n(emp.data, 4) # randomly select n entries from data frame
-
sample_frac, sample frac fraction of rows
- sample_frac(emp.data, 0.5)
-
do, execute R expression
- result = do(mtcars, model = lm(mpg ~ wt, data = .))
- print(result$model)
-
Reference