• R/O
  • SSH

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

File Info

Rev. b0f9052445f388d368bbf5d07343d0aa202d43a7
크기 15,377 bytes
Time 2022-02-17 01:05:51
Author Lorenzo Isella
Log Message

Another flipbook but for the covid tracker -- work in progress!

Content

---
title: "Covid Tracker Data Processing"
subtitle: "A Step-by-step Guide"
author: "Lorenzo Isella"
output:
  xaringan::moon_reader:
    lib_dir: libs
    css: [default, hygge, ninjutsu]
    nature:
      ratio: 16:10
      highlightStyle: github
      highlightLines: true
      countIncrementalSlides: false
---

```{r setup, include = FALSE}
library(flipbookr)
library(tidyverse)
knitr::opts_chunk$set(fig.width = 6, message = FALSE, 
                      warning = FALSE, comment = "", 
                      cache = F)
```

```{css, eval = TRUE, echo = FALSE}
.remark-code{line-height: 1.5; font-size: 80%}
@media print {
  .has-continuation {
    display: block;
  }
}
```




```{r intro, include = FALSE}
library(tidyverse)
library(janitor)
library(lubridate)
library(openxlsx)
library(stringr)
library(formattable)
library(stringi)
library(kableExtra)
library(flextable)

source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
```
---
First we load the libraries and thus set up the working environment
---
`r chunk_reveal(chunk_name = "intro" ,
              title =  "###  Load the libraries and select some years")`
---
We set up vectors and tibbles with MS and years that we need to
process the data
---
```{r ms, include = FALSE}
exclude_ms <- c("UK")

ms_list <- c("AT",    "BE",    "BG" ,
                        "CY"  ,  "CZ" ,   "DE",
                        "DK" ,   "EE",    "EL" ,  
                        "ES",    "FI",    "FR"  ,
                        "HR" ,   "HU"  ,  "IE" ,
                        "IT" ,   "LT"  ,  "LU",   
                        "LV" ,   "MT",    "NL" ,
                        "PL" ,   "PT"  ,  "RO",
                        "SE"  ,  "SI"  ,  "SK")



year_focus <- c(2019,2020,2021)

table_generation <- 1 ## whether I should generate the tables or not

names <- tibble(iso2= c("AT",    "BE",    "BG" , "CY"  ,  "CZ" ,   "DE",
                        "DK" ,   "EE",    "EL" ,  "ES",    "FI",    "FR"  ,
                        "HR" ,   "HU"  ,  "IE" , "IT" ,   "LT"  ,  "LU",   
                        "LV" ,   "MT",    "NL" ,"PL" ,   "PT"  ,  "RO",
                        "SE"  ,  "SI"  ,  "SK", "UK" ,   "Total"),
country=c("Austria", "Belgium", "Bulgaria","Cyprus", "Czechia", "Germany",
       "Denmark","Estonia", "Greece","Spain","Finland", "France",
       "Croatia","Hungary", "Ireland", "Italy","Lithuania", "Luxembourg",
       "Latvia","Malta", "Netherlands","Poland","Portugal", "Romania",
       "Sweden","Slovenia", "Slovakia", "United Kingdom","Total EU"))
names%>% glimpse
```

---
`r chunk_reveal(chunk_name = "ms" ,
              title =  "###  Set up the lists of MS")`
---
Then create two vectors with the different spelling of the TF (with and
without colon) that we will need when we collect the stats on the TF
chapters.
---
```{r tf, include = FALSE}
tf_list <- c("TF1:", "TF2:", "TF3:",  "TF4:",
                       "TF6:", "TF7:", "TF8:",
             "TF9:", "TF10:", "TF11:", "TF12:",
             "TF13:", "TF14:")


tf_list2 <- c("TF1,", "TF2", "TF3",  "TF4",
                       "TF6", "TF7", "TF8",
             "TF9", "TF10", "TF11", "TF12",
             "TF13", "TF14")
tf_list
tf_list2

```
---
`r chunk_reveal(chunk_name = "tf" ,
              title =  "###  Set up the lists of for the TF")`
---
Now I read the amendments and decisions from the excel file
---
```{r ini_import, include = FALSE}
df_am_ini <- read.xlsx("./input/covid_tracker.xlsx", sheet="Amendments") %>%
    as_tibble()
df_am_ini
df_dec_ini <- read.xlsx("./input/covid_tracker.xlsx", sheet="Decisions") %>%
    as_tibble()
df_dec_ini%>% glimpse
```
---
`r chunk_reveal(chunk_name = "ini_import" ,
              title =  "###  Raw data for amendments and decisions")`
---
Clean the amendment and decision data and save them in an R friendly
format (RDS file) 
---
```{r clean_save, include = FALSE}
df_am <- df_am_ini %>%
    clean_data()  %>%     ### clean the amendments data
    filter(member_state_of_amendment %!in% exclude_ms)%>%
    pattern_to_na(":") %>%
    mutate(decision_date=if_else(is.na(decision_date_manually_encoded),
                                 decision_date_from_isis_not_accurate,
                                 decision_date_manually_encoded))
df_am%>% glimpse
saveRDS(df_am, "./intermediate_files/amendments_data.RDS")


amendments_decision_date <- df_am %>%
    select(amendment_case_number, decision_date) %>%
    distinct()


df_dec <- df_dec_ini %>%
    clean_data()  %>%  ### clean the decision data
    filter(member_state_code %!in% exclude_ms)%>%
    pattern_to_na(":")
df_dec%>% glimpse
saveRDS(df_dec,"./intermediate_files/decisions_data.RDS")

```
---
`r chunk_reveal(chunk_name = "clean_save" ,
              title =  "###  Clean and save amendments and decisions")`
---
Read the data from the report1 sheet, clean it and deduplicate it when necessary
in order to reproduce the sheet report1_unique. Save the cleaned data
both as an RDS and an Excel file.
---
```{r report, include = FALSE}
rep1_ini <- read.xlsx("./input/covid_tracker.xlsx", sheet="Report 1 - detailed view") %>%
    as_tibble()

rep1 <- rep1_ini %>%
    clean_data() %>%  ## clean the data of the full report
    distinct() %>% 
    filter(member_state_2_letter_code %!in% exclude_ms) %>%
    mutate(confirmed_budgets=as.numeric(confirmed_budgets)) %>%
    mutate(decision_date=as.integer(decision_date)) %>%
    mutate(number_of_measures_whitin_the_scheme=
               as.integer(number_of_measures_whitin_the_scheme)) %>%
    pattern_to_na(":")

rep1%>% glimpse
saveRDS(rep1,"./intermediate_files/report1_detailed.RDS")

rep1_unique <- rep1 %>%
    filter(phase_name %in% c("Notification", "Prenotification"),
           !is.na(decided)) %>% 
    distinct(case_reference, .keep_all=T)
rep1_unique%>% glimpse

saveRDS(rep1_unique,"./intermediate_files/report1_unique.RDS")
save_excel(rep1_unique, "./intermediate_files/data_budget.xlsx")

```
---
`r chunk_reveal(chunk_name = "report" ,
              title =  "###  Clean and save the report data")`
---
Now a bunch of statistics and aggregations concerning the budget
---
```{r budget1, include = FALSE}
budget_test <- rep1_unique %>%
    group_by(member_state_2_letter_code) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T)) %>%
    ungroup %>%
    mutate(share=budget/sum(budget)) %>%
    add_total(nrow(.)+1) %>%
    filter(!is.na(member_state_2_letter_code))

budget_test%>% glimpse
saveRDS(budget_test, "./intermediate_files/budget_MS.RDS")

```
---
`r chunk_reveal(chunk_name = "budget1" ,
              title =  "###  Budget by MS and save the result")`
---
Budget for a selection of years in the vector year_focus
---
```{r budget2, include = FALSE}
budget_test_focus <- rep1_unique %>%
    mutate(decision_date=excel_numeric_to_date(decision_date)) %>%
    mutate(year=year(decision_date)) %>%
    filter(year %in% year_focus) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T))

budget_test_focus%>% glimpse
```
---
`r chunk_reveal(chunk_name = "budget2" ,
              title =  "### Total  Budget by for selected years")`
---
```{r budget3, include = FALSE}
budget_type <- rep1_unique %>%
    group_by(type_of_aid) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T)) %>%
    ungroup %>%
    filter(complete.cases(.)) %>%
    mutate(share=budget/sum(budget)) %>%
    mutate(name=type_of_aid) %>% 
    arrange(desc(budget))%>%
    add_total(nrow(.)+1)

budget_type%>% glimpse

budget_type_ms <- rep1_unique %>%
    group_by( member_state_2_letter_code, type_of_aid) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T)) %>%
    na.omit %>%
    mutate(share=budget/sum(budget, na.rm=T)) %>%
    group_modify(~ .x %>%
                     adorn_totals("row")) %>%
    ungroup %>%
    complete(member_state_2_letter_code, type_of_aid) %>%
    na_to_pattern(0) %>% 
    mutate(name=type_of_aid) %>% 
    na_to_pattern("Total") %>%
    arrange(member_state_2_letter_code,desc(budget))

budget_type_ms%>% glimpse 
```
---
`r chunk_reveal(chunk_name = "budget3" ,
              title =  "### Budget by tipe of aid and type of aid and MS")`
---
Budget by type of measure and measure and MS
---
```{r budget4, include = FALSE}
budget_measure <- rep1_unique %>%
    group_by(type_of_measure) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T)) %>%
    ungroup %>%
    filter(complete.cases(.)) %>%
    mutate(share=budget/sum(budget))%>%
    add_total(nrow(.)+1)

budget_measure%>% glimpse

budget_measure_ms <- rep1_unique %>%
    group_by( member_state_2_letter_code,
             type_of_measure) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T)) %>%
    na.omit %>%
    mutate(share=budget/sum(budget))%>%
    group_modify(~ .x %>%
                     adorn_totals("row"))  %>% 
    ungroup %>%
    complete(member_state_2_letter_code,
             type_of_measure) %>%
    na_to_pattern(0)%>%
    filter(type_of_measure !=" ", type_of_measure!=0)

budget_measure_ms%>% glimpse
```
---
`r chunk_reveal(chunk_name = "budget4" ,
              title =  "### Budget by tipe of measure and type of measure and MS")`
---
```{r budget5, include = FALSE}
budget_measure_case_ms <- rep1_unique %>%
    group_by( member_state_2_letter_code,case_reference,
             type_of_measure) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T)) %>%
    na.omit %>%
    mutate(share=budget/sum(budget))%>%
    ungroup %>%
    complete(member_state_2_letter_code,
             type_of_measure) %>%
    na_num_to_pattern(0) %>%
    na_char_to_pattern("Missing")%>%
    filter(type_of_measure!=" ", type_of_measure!=0)

budget_measure_case_ms%>% glimpse
save_excel(budget_measure_case_ms, "./intermediate_files/budget_measure_case.xlsx")
 
```
---
`r chunk_reveal(chunk_name = "budget5" ,
              title =  "### Detailed breakdown of the budget and save")`
---
```{r budget6, include = FALSE}
budget_by_beneficiary_sector <-  rep1_unique %>%
    group_by(beneficary_sector) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T)) %>%
    ungroup %>%
    filter(complete.cases(.)) %>%
    mutate(share=budget/sum(budget)) %>%
    arrange(desc(budget))%>%
    add_total(nrow(.)+1) %>%
    filter(beneficary_sector!=" ", beneficary_sector!=0)
budget_by_beneficiary_sector%>% glimpse

budget_by_beneficiary_sector_ms <-  rep1_unique %>%
    group_by(member_state_2_letter_code,beneficary_sector) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T)) %>%
    na.omit %>%
    mutate(share=budget/sum(budget)) %>%
    group_modify(~ .x %>%adorn_totals("row"))  %>% 
    ungroup%>%
    complete(member_state_2_letter_code,beneficary_sector) %>%
    na_to_pattern(0) %>%
    filter(beneficary_sector!=" ", beneficary_sector!=0)
budget_by_beneficiary_sector_ms%>% glimpse

saveRDS(budget_by_beneficiary_sector_ms, "./intermediate_files/budget_sector_MS.RDS")

```
---
`r chunk_reveal(chunk_name = "budget6" ,
              title =  "### Budget by beneficiary sector and by beneficiary sector and MS")`
---
```{r budget7, include = FALSE}
budget_by_beneficiary_type <-  rep1_unique %>%
    group_by(beneficary_type) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T)) %>%
    ungroup %>%
    filter(complete.cases(.)) %>%
    mutate(share=budget/sum(budget)) %>%
    arrange(desc(budget))%>%
    add_total(nrow(.)+1)%>%
    filter(beneficary_type!=" ", beneficary_type!=0)
budget_by_beneficiary_type%>% glimpse

budget_by_beneficiary_type_ms <-  rep1_unique %>%
    group_by(member_state_2_letter_code,beneficary_type) %>%
    summarise(budget=sum(confirmed_budgets, na.rm=T)) %>%
    na.omit %>%
    mutate(share=budget/sum(budget)) %>%
    group_modify(~ .x %>%
                     adorn_totals("row"))  %>% 
    ungroup%>%
    complete(member_state_2_letter_code,beneficary_type) %>%
    na_to_pattern(0)%>%
    filter(beneficary_type!=" ", beneficary_type!=0)

budget_by_beneficiary_type_ms%>% glimpse
saveRDS(budget_by_beneficiary_type_ms, "./intermediate_files/budget_type_MS.RDS")

```
---
`r chunk_reveal(chunk_name = "budget7" ,
              title =  "### Budget by beneficiary type and by beneficiary type and MS")`
---
```{r budget8, include = FALSE}
decisions_test <- rep1_unique %>%
    filter(decided=="YES")%>%
    group_by(member_state_2_letter_code) %>%
    summarise(n_decisions=n()) %>%
    ungroup() %>%
    arrange(member_state_2_letter_code)%>%
    add_total(nrow(.)+1)
decisions_test%>% glimpse

undecided_count <- rep1_unique %>%
    filter(!is.na(legal_basis),
           decided=="NO")
undecided_count%>% glimpse
save_excel(undecided_count, "./intermediate_files/undecided_cases.xlsx")

```
---
`r chunk_reveal(chunk_name = "budget8" ,
              title =  "### Statistics on the decisions by MS and save")`
---
```{r decisions1, include = FALSE}
decisions_eu <- rep1_unique %>%
    filter(decided=="YES")%>%
    group_by(legal_basis) %>%
    summarise(n_decisions=n()) %>%
    ungroup() %>%
    arrange(desc(n_decisions)) %>%
    mutate(share=n_decisions/sum(n_decisions))%>%
    add_total(nrow(.)+1)
decisions_eu %>% glimpse

decisions_eu_ms <- rep1_unique %>%
    filter(decided=="YES")%>%
    group_by( member_state_2_letter_code,legal_basis) %>%
    summarise(n_decisions=n()) %>%
    na.omit %>%
    mutate(share=n_decisions/sum(n_decisions))%>%
    group_modify(~ .x %>%
                     adorn_totals("row"))  %>% 
    ungroup() %>%
    complete(member_state_2_letter_code,legal_basis) %>%
    na_to_pattern(0) %>%
    filter(legal_basis!="0")
decisions_eu_ms %>% glimpse

saveRDS(decisions_eu_ms, "./intermediate_files/decisions_MS.RDS")
```
---
`r chunk_reveal(chunk_name = "decisions1" ,
              title =  "### Number decisions by MS and legal basis and save")`
---
```{r decisions2, include = FALSE}
decisions_eu_ms_time <- rep1_unique %>%
    filter(decided=="YES") %>%
    mutate(decision_date=excel_numeric_to_date(decision_date))  %>%
    select(case_reference,member_state_2_letter_code,
           decision_date,confirmed_budgets) %>%
    mutate(year=year(decision_date),
           month=month(decision_date)) %>%
    group_by(member_state_2_letter_code, year, month) %>%
    summarise(n_decisions=n(),
              budget=sum(confirmed_budgets)) %>%
    ungroup %>%
    mutate(date=paste(year, "-", month, sep="")) %>%
    mutate(date=ym(date)) %>%
    arrange(member_state_2_letter_code,date) %>%
    mutate(month_name=month.name[.$month]) %>%
    group_by(member_state_2_letter_code) %>%
    mutate(share_dec=n_decisions/sum(n_decisions)*100,
           share_budget=budget/sum(budget)*100) %>%
    mutate(share_dec=round_preserve_sum(share_dec,1),
           share_budget=round_preserve_sum(share_budget,1)) %>%
    ungroup %>%
    mutate(date_easy=paste(month_name, year, sep=" "))
decisions_eu_ms_time %>% glimpse    

saveRDS(decisions_eu_ms_time, "./intermediate_files/decisions_budget_time.RDS")

```
---
`r chunk_reveal(chunk_name = "decisions2" ,
              title =  "### Number decisions by MS along time and save")`
---
```{r amendments1, include = FALSE}
count_am <- df_am %>%
    group_by(member_state_of_amendment) %>%
    summarise(n_amendments=length(unique(amendment_case_number))) %>%
    ungroup%>%
    add_total(nrow(.)+1)
count_am %>% glimpse

saveRDS(count_am, "./intermediate_files/number_amendments.RDS")

```
---
`r chunk_reveal(chunk_name = "amendments1" ,
              title =  "### Number amendments by MS and save")`