Sample Code: R

Due by 11:59 PM on Sunday, December 31, 0000

Elements of R

library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0           ✔ purrr   0.2.5      
## ✔ tibble  2.0.99.9000     ✔ dplyr   0.8.0.9000 
## ✔ tidyr   0.8.2           ✔ stringr 1.3.1      
## ✔ readr   1.3.1           ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()  masks stats::filter()
## ✖ purrr::is_null() masks testthat::is_null()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ dplyr::matches() masks testthat::matches()
library(socviz)
## 
## Attaching package: 'socviz'
## The following object is masked from 'package:kjhutils':
## 
##     %nin%
c(1, 2, 3, 1, 3, 5, 25)
## [1]  1  2  3  1  3  5 25
my_numbers <- c(1, 2, 3, 1, 3, 5, 25)

your_numbers <- c(5, 31, 71, 1, 3, 21, 6)
my_numbers
## [1]  1  2  3  1  3  5 25
mean()

# Error in mean.default() : argument "x" is missing, with no default
mean(x = my_numbers)
## [1] 5.714286
mean(x = your_numbers)
## [1] 19.71429
my_summary <- summary(my_numbers)
table(my_numbers)
## my_numbers
##  1  2  3  5 25 
##  2  1  2  1  1
sd(my_numbers)
## [1] 8.616153
my_numbers * 5
## [1]   5  10  15   5  15  25 125
my_numbers + 1
## [1]  2  3  4  2  4  6 26
my_numbers + my_numbers
## [1]  2  4  6  2  6 10 50
class(my_numbers)
## [1] "numeric"
class(my_summary)
## [1] "summaryDefault" "table"
class(summary)
## [1] "function"
titanic
##       fate    sex    n percent
## 1 perished   male 1364    62.0
## 2 perished female  126     5.7
## 3 survived   male  367    16.7
## 4 survived female  344    15.6
class(titanic)
## [1] "data.frame"
titanic_tb <- as_tibble(titanic)
titanic_tb
## # A tibble: 4 x 4
##   fate     sex        n percent
##   <fct>    <fct>  <dbl>   <dbl>
## 1 perished male    1364    62  
## 2 perished female   126     5.7
## 3 survived male     367    16.7
## 4 survived female   344    15.6
str(my_numbers)
##  num [1:7] 1 2 3 1 3 5 25
str(my_summary)
##  'summaryDefault' Named num [1:6] 1 1.5 3 5.71 4 ...
##  - attr(*, "names")= chr [1:6] "Min." "1st Qu." "Median" "Mean" ...

The CQ data

knitr::opts_chunk$set(echo = TRUE)

library(tidyverse)
library(janitor)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(socviz)
## Calculate age in years, months, or days
calc_age <- function(birth_date, ref_date = Sys.Date(), unit = "year") {

    require(lubridate)

    period <- as.period(interval(birth_date, ref_date),
                        unit = unit)

    switch(unit,
           year = year(period),
           month = month(period),
           day = day(period),
           stop = "Unknown time unit. Choose year, month, or day.")

}

Load it

filenames <- dir(path = "data/clean",
                 pattern = "*.csv",
                 full.names = TRUE)

filenames
##  [1] "data/clean/01_79_congress.csv"  "data/clean/02_80_congress.csv" 
##  [3] "data/clean/03_81_congress.csv"  "data/clean/04_82_congress.csv" 
##  [5] "data/clean/05_83_congress.csv"  "data/clean/06_84_congress.csv" 
##  [7] "data/clean/07_85_congress.csv"  "data/clean/08_86_congress.csv" 
##  [9] "data/clean/09_87_congress.csv"  "data/clean/10_88_congress.csv" 
## [11] "data/clean/11_89_congress.csv"  "data/clean/12_90_congress.csv" 
## [13] "data/clean/13_91_congress.csv"  "data/clean/14_92_congress.csv" 
## [15] "data/clean/15_93_congress.csv"  "data/clean/16_94_congress.csv" 
## [17] "data/clean/17_95_congress.csv"  "data/clean/18_96_congress.csv" 
## [19] "data/clean/19_97_congress.csv"  "data/clean/20_98_congress.csv" 
## [21] "data/clean/21_99_congress.csv"  "data/clean/22_100_congress.csv"
## [23] "data/clean/23_101_congress.csv" "data/clean/24_102_congress.csv"
## [25] "data/clean/25_103_congress.csv" "data/clean/26_104_congress.csv"
## [27] "data/clean/27_105_congress.csv" "data/clean/28_106_congress.csv"
## [29] "data/clean/29_107_congress.csv" "data/clean/30_108_congress.csv"
## [31] "data/clean/31_109_congress.csv" "data/clean/32_110_congress.csv"
## [33] "data/clean/33_111_congress.csv" "data/clean/34_112_congress.csv"
## [35] "data/clean/35_113_congress.csv" "data/clean/36_114_congress.csv"
## [37] "data/clean/37_115_congress.csv" "data/clean/38_116_congress.csv"
specs <- spec_csv(filenames[1])
## Parsed with column specification:
## cols(
##   .default = col_character()
## )
## See spec(...) for full column specifications.
specs
## cols(
##   Last = col_character(),
##   First = col_character(),
##   Middle = col_character(),
##   Suffix = col_character(),
##   Nickname = col_character(),
##   Born = col_character(),
##   Death = col_character(),
##   Sex = col_character(),
##   Position = col_character(),
##   Party = col_character(),
##   State = col_character(),
##   District = col_character(),
##   Start = col_character(),
##   End = col_character(),
##   Religion = col_character(),
##   Race = col_character(),
##   `Educational Attainment` = col_character(),
##   JobType1 = col_character(),
##   JobType2 = col_character(),
##   JobType3 = col_character(),
##   JobType4 = col_character(),
##   JobType5 = col_character(),
##   Mil1 = col_character(),
##   Mil2 = col_character(),
##   Mil3 = col_character()
## )
data <- filenames %>%
    map_dfr(read_csv, .id = "congress", col_types = specs)

data <- clean_names(data)

data
## # A tibble: 20,580 x 26
##    congress last  first middle suffix nickname born  death sex   position
##    <chr>    <chr> <chr> <chr>  <chr>  <chr>    <chr> <chr> <chr> <chr>   
##  1 1        Aber… Thom… Gerst… <NA>   <NA>     05/1… 01/2… M     U.S. Re…
##  2 1        Adams Sher… <NA>   <NA>   <NA>     01/0… 10/2… M     U.S. Re…
##  3 1        Aiken Geor… David  <NA>   <NA>     08/2… 11/1… M     U.S. Se…
##  4 1        Allen Asa   Leona… <NA>   <NA>     01/0… 01/0… M     U.S. Re…
##  5 1        Allen Leo   Elwood <NA>   <NA>     10/0… 01/1… M     U.S. Re…
##  6 1        Almo… J.    Linds… Jr.    <NA>     06/1… 04/1… M     U.S. Re…
##  7 1        Ande… Herm… Carl   <NA>   <NA>     01/2… 07/2… M     U.S. Re…
##  8 1        Ande… Clin… Presba <NA>   <NA>     10/2… 11/1… M     U.S. Re…
##  9 1        Ande… John  Zuing… <NA>   <NA>     03/2… 02/0… M     U.S. Re…
## 10 1        Andr… Augu… Herman <NA>   <NA>     10/1… 01/1… M     U.S. Re…
## # … with 20,570 more rows, and 16 more variables: party <chr>,
## #   state <chr>, district <chr>, start <chr>, end <chr>, religion <chr>,
## #   race <chr>, educational_attainment <chr>, job_type1 <chr>,
## #   job_type2 <chr>, job_type3 <chr>, job_type4 <chr>, job_type5 <chr>,
## #   mil1 <chr>, mil2 <chr>, mil3 <chr>