Cap. 5 - Tidyverse

5.1 Reescreva as operações abaixo utilizando pipes %>%.

  • log10(cumsum(1:100))
  • sum(sqrt(abs(rnorm(100))))
  • sum(sort(sample(1:10, 10000, rep = TRUE)))

Solução:

library(tidyverse)

1:100 %>% 
    cumsum() %>% 
    log10()
#>   [1] 0.0000000 0.4771213 0.7781513 1.0000000 1.1760913
#>   [6] 1.3222193 1.4471580 1.5563025 1.6532125 1.7403627
#>  [11] 1.8195439 1.8920946 1.9590414 2.0211893 2.0791812
#>  [16] 2.1335389 2.1846914 2.2329961 2.2787536 2.3222193
#>  [21] 2.3636120 2.4031205 2.4409091 2.4771213 2.5118834
#>  [26] 2.5453071 2.5774918 2.6085260 2.6384893 2.6674530
#>  [31] 2.6954817 2.7226339 2.7489629 2.7745170 2.7993405
#>  [36] 2.8234742 2.8469553 2.8698182 2.8920946 2.9138139
#>  [41] 2.9350032 2.9556878 2.9758911 2.9956352 3.0149403
#>  [46] 3.0338257 3.0523091 3.0704073 3.0881361 3.1055102
#>  [51] 3.1225435 3.1392492 3.1556396 3.1717265 3.1875207
#>  [56] 3.2030329 3.2182729 3.2332500 3.2479733 3.2624511
#>  [61] 3.2766915 3.2907022 3.3044905 3.3180633 3.3314273
#>  [66] 3.3445887 3.3575537 3.3703280 3.3829171 3.3953264
#>  [71] 3.4075608 3.4196254 3.4315246 3.4432630 3.4548449
#>  [76] 3.4662743 3.4775553 3.4886917 3.4996871 3.5105450
#>  [81] 3.5212689 3.5318619 3.5423274 3.5526682 3.5628874
#>  [86] 3.5729877 3.5829719 3.5928427 3.6026025 3.6122539
#>  [91] 3.6217992 3.6312408 3.6405808 3.6498215 3.6589648
#>  [96] 3.6680130 3.6769678 3.6858313 3.6946052 3.7032914

rnorm(100) %>% 
    abs() %>% 
    sqrt() %>% 
    sum()
#> [1] 82.55207

sample(1:10, 10000, rep = TRUE) %>% 
    sort() %>% 
    sum()
#> [1] 55176

5.2 Use a função download.file() e unzip() para baixar e extrair o arquivo do data paper de médios e grandes mamíferos: ATLANTIC MAMMALS. Em seguinda, importe para o R, usando a função readr::read_csv().

Solução:

library(tidyverse)
download.file(url = "https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2785&file=ecy2785-sup-0001-DataS1.zip", 
              destfile = "ecy2785-sup-0001-DataS1.zip", mode = "wb")

unzip("ecy2785-sup-0001-DataS1.zip")

dp_lm <- readr::read_csv("ATLANTIC_MAMMAL_MID_LARGE _assemblages_and_sites.csv")

5.3 Use a função tibble::glimpse() para ter uma noção geral dos dados importados no item anterior.

Solução:

library(tidyverse)
dplyr::glimpse(dp_lm)
#> Rows: 4,680
#> Columns: 40
#> $ ID                     <chr> "AML01", "AML01", "AML01", …
#> $ Reference_paper_number <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, …
#> $ Country                <chr> "Brazil", "Brazil", "Brazil…
#> $ State                  <chr> "rio_grande_do_sul", "rio_g…
#> $ Municipality           <chr> "Sinimbu", "Sinimbu", "Sini…
#> $ Study_location         <chr> "Reserva Particular do Patr…
#> $ Latitude               <dbl> -29.38333, -29.38333, -29.3…
#> $ Longitude              <dbl> -52.53333, -52.53333, -52.5…
#> $ Precision              <chr> "not_precise", "not_precise…
#> $ Size_ha                <chr> "221", "221", "221", "221",…
#> $ Temperature            <chr> "18", "18", "18", "18", "18…
#> $ Altitude               <chr> "150-650", "150-650", "150-…
#> $ Annual_rainfall        <chr> NA, NA, NA, NA, NA, NA, NA,…
#> $ Vegetation_type        <chr> "Semideciduous forest", "Se…
#> $ Protect_area           <chr> "yes", "yes", "yes", "yes",…
#> $ Matrix                 <chr> NA, NA, NA, NA, NA, NA, NA,…
#> $ Reference              <chr> "Abreu-Junior, E.F. and Koh…
#> $ Publication_year       <dbl> 2009, 2009, 2009, 2009, 200…
#> $ Type_of_publication    <chr> "Article", "Article", "Arti…
#> $ Month_start            <chr> "November", "November", "No…
#> $ Year_start             <dbl> 2007, 2007, 2007, 2007, 200…
#> $ Month_finish           <chr> "April", "April", "April", …
#> $ Year_finish            <dbl> 2009, 2009, 2009, 2009, 200…
#> $ Total_of_months        <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, …
#> $ Sampling_habitat       <chr> "Interior", "Interior", "In…
#> $ Effort                 <dbl> 109.00, 109.00, 109.00, 109…
#> $ Effort_method          <chr> "camera_days", "camera_days…
#> $ Method                 <chr> "mixed_method", "mixed_meth…
#> $ Order                  <chr> "Carnivora", "Rodentia", "C…
#> $ Genus_on_paper         <chr> "Cerdocyon", "Cuniculus", "…
#> $ Species_name_on_paper  <chr> "Cerdocyon thous", "Cunicul…
#> $ Actual_species_Name    <chr> "Cerdocyon thous", "Cunicul…
#> $ Number_of_record       <chr> NA, NA, NA, NA, NA, NA, NA,…
#> $ `Density(groups/km2)`  <dbl> NA, NA, NA, NA, NA, NA, NA,…
#> $ `Density(ind/km2)`     <chr> NA, NA, NA, NA, NA, NA, NA,…
#> $ `Density(ind/km10)`    <dbl> NA, NA, NA, NA, NA, NA, NA,…
#> $ `Abundance(%)`         <dbl> NA, NA, NA, NA, NA, NA, NA,…
#> $ Abudance_relative      <dbl> NA, NA, NA, NA, NA, NA, NA,…
#> $ `Abundance(10/km)`     <dbl> NA, NA, NA, NA, NA, NA, NA,…
#> $ Voucher_Specimens      <chr> NA, NA, NA, NA, NA, NA, NA,…

5.4 Compare os dados de penguins (palmerpenguins::penguins_raw e palmerpenguins::penguins). Monte uma série de funções dos pacotes tidyr e dplyr para limpar os dados e fazer com que o primeiro dado seja igual ao segundo.

Solução:

library(tidyverse)
library(palmerpenguins)

penguins_raw
#> # A tibble: 344 × 17
#>    studyName `Sample Number` Species    Region Island Stage 
#>    <chr>               <dbl> <chr>      <chr>  <chr>  <chr> 
#>  1 PAL0708                 1 Adelie Pe… Anvers Torge… Adult…
#>  2 PAL0708                 2 Adelie Pe… Anvers Torge… Adult…
#>  3 PAL0708                 3 Adelie Pe… Anvers Torge… Adult…
#>  4 PAL0708                 4 Adelie Pe… Anvers Torge… Adult…
#>  5 PAL0708                 5 Adelie Pe… Anvers Torge… Adult…
#>  6 PAL0708                 6 Adelie Pe… Anvers Torge… Adult…
#>  7 PAL0708                 7 Adelie Pe… Anvers Torge… Adult…
#>  8 PAL0708                 8 Adelie Pe… Anvers Torge… Adult…
#>  9 PAL0708                 9 Adelie Pe… Anvers Torge… Adult…
#> 10 PAL0708                10 Adelie Pe… Anvers Torge… Adult…
#> # … with 334 more rows, and 11 more variables:
#> #   Individual ID <chr>, Clutch Completion <chr>,
#> #   Date Egg <date>, Culmen Length (mm) <dbl>,
#> #   Culmen Depth (mm) <dbl>, Flipper Length (mm) <dbl>,
#> #   Body Mass (g) <dbl>, Sex <chr>,
#> #   Delta 15 N (o/oo) <dbl>, Delta 13 C (o/oo) <dbl>,
#> #   Comments <chr>
penguins
#> # A tibble: 344 × 8
#>    species island    bill_length_mm bill_depth_mm
#>    <fct>   <fct>              <dbl>         <dbl>
#>  1 Adelie  Torgersen           39.1          18.7
#>  2 Adelie  Torgersen           39.5          17.4
#>  3 Adelie  Torgersen           40.3          18  
#>  4 Adelie  Torgersen           NA            NA  
#>  5 Adelie  Torgersen           36.7          19.3
#>  6 Adelie  Torgersen           39.3          20.6
#>  7 Adelie  Torgersen           38.9          17.8
#>  8 Adelie  Torgersen           39.2          19.6
#>  9 Adelie  Torgersen           34.1          18.1
#> 10 Adelie  Torgersen           42            20.2
#> # … with 334 more rows, and 4 more variables:
#> #   flipper_length_mm <int>, body_mass_g <int>, sex <fct>,
#> #   year <int>

penguins_raw %>% 
    dplyr::select(Species, Island, `Culmen Length (mm)`:Sex, `Date Egg`) %>% 
    dplyr::rename(species = Species,
                  island = Island,
                  bill_length_mm = `Culmen Length (mm)`,
                  bill_depth_mm = `Culmen Depth (mm)`,
                  flipper_length_mm = `Flipper Length (mm)`,
                  body_mass_g = `Body Mass (g)`,
                  sex = Sex,
                  year = `Date Egg`) %>% 
    tidyr::separate(species, c("species", NA, NA, NA, NA)) %>% 
    dplyr::mutate(sex = stringr::str_to_lower(sex),
                  year = lubridate::year(year))
#> # A tibble: 344 × 8
#>    species island    bill_length_mm bill_depth_mm
#>    <chr>   <chr>              <dbl>         <dbl>
#>  1 Adelie  Torgersen           39.1          18.7
#>  2 Adelie  Torgersen           39.5          17.4
#>  3 Adelie  Torgersen           40.3          18  
#>  4 Adelie  Torgersen           NA            NA  
#>  5 Adelie  Torgersen           36.7          19.3
#>  6 Adelie  Torgersen           39.3          20.6
#>  7 Adelie  Torgersen           38.9          17.8
#>  8 Adelie  Torgersen           39.2          19.6
#>  9 Adelie  Torgersen           34.1          18.1
#> 10 Adelie  Torgersen           42            20.2
#> # … with 334 more rows, and 4 more variables:
#> #   flipper_length_mm <dbl>, body_mass_g <dbl>, sex <chr>,
#> #   year <dbl>

5.5 Usando os dados de penguins (palmerpenguins::penguins), calcule a correlação de Pearson entre comprimento e profundidade do bico para cada espécie e para todas as espécies. Compare os índices de correlação para exemplificar o Paradoxo de Simpsom.

Solução:

library(tidyverse)
library(palmerpenguins)

cor(penguins$bill_length_mm, penguins$bill_depth_mm, use = "na.or.complete")
#> [1] -0.2350529

penguins %>%
    dplyr::group_split(species) %>% 
    purrr::map(~cor(.x$bill_length_mm, .x$bill_depth_mm, use = "na.or.complete"))
#> [[1]]
#> [1] 0.3914917
#> 
#> [[2]]
#> [1] 0.6535362
#> 
#> [[3]]
#> [1] 0.6433839

5.6 Oficialmente a pandemia de COVID-19 começou no Brasil com o primeiro caso no dia 26 de fevereiro de 2020. Calcule quantos anos, meses e dias se passou desde então. Calcule também quanto tempo se passou até você ser vacinado.

Solução:

covid_inicio_br <- lubridate::dmy("26-02-2020")
vacina <- lubridate::dmy("20-07-2021")

intervalo_covid <- lubridate::interval(covid_inicio_br, lubridate::today())
intervalo_vacina <- lubridate::interval(covid_inicio_br, vacina)

lubridate::as.period(intervalo_covid)
#> [1] "1y 11m 18d 0H 0M 0S"
lubridate::as.period(intervalo_vacina)
#> [1] "1y 4m 24d 0H 0M 0S"