• R/O
  • SSH

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revision58bdda60df9632ec816ae58dd167966cd2869f88 (tree)
Time2024-08-30 17:13:08
AuthorLorenzo Isella <lorenzo.isella@gmai...>
CommiterLorenzo Isella

Log Message

I added an automatic exploratory data analysis.

Change Summary

Incremental Difference

diff -r 57999279bcb6 -r 58bdda60df96 R-codes/clean_scoreboard.R
--- a/R-codes/clean_scoreboard.R Thu Aug 29 11:22:43 2024 +0200
+++ b/R-codes/clean_scoreboard.R Fri Aug 30 10:13:08 2024 +0200
@@ -1,23 +1,28 @@
11 rm(list=ls())
2-## last saved on Time-stamp: "2024-01-12 10:43:01 lorenzo"
2+## last saved on Time-stamp: "2024-08-30 10:09:30 lorenzo"
33
44
55 library(tidyverse)
66 library(janitor)
77 library(openxlsx)
8-
8+library(DataExplorer)
99
1010
1111 source("/home/lorenzo/myprojects-hg/R-codes/stat_lib.R")
1212
13-df<- read.xlsx("../input/scb_data_for_figures_2-22_top.xlsx") |>
14- as_tibble() |>
13+## df<- read.xlsx("../input/scb_data_for_figures-24-01-2024.xlsx") |>
14+## as_tibble() |>
15+## clean_names()
16+## ## clean_data()
17+
18+df<- read_csv("../input/scb_data_for_figures_LI.zip") |>
19+ ## as_tibble() |>
1520 clean_names()
16- ## clean_data()
1721
18-saveRDS(df, "scoreboard.RDS")
1922
20-write_tsv(df, "scoreboard.tsv.gz")
23+saveRDS(df, "../input/scoreboard.RDS")
24+
25+## write_tsv(df, "scoreboard.tsv.gz")
2126
2227 ## df_summary <- df %>%
2328 ## group_by(year, member_state) %>%
@@ -29,4 +34,36 @@
2934
3035 ## saveRDS(df_summary, "scoreboard_aggregated_expenditure.RDS")
3136
37+
38+
39+
40+config <- list(
41+ "introduce" = list(),
42+ "plot_intro" = list(),
43+ "plot_str" = list(
44+ "type" = "diagonal",
45+ "fontSize" = 35,
46+ "width" = 1000,
47+ "margin" = list("left" = 350, "right" = 250)
48+ ),
49+ "plot_missing" = list(),
50+ "plot_histogram" = list(),
51+ "plot_density" = list(),
52+ "plot_qq" = list(sampled_rows = 1000L),
53+ "plot_bar" = list(),
54+ "plot_correlation" = list("cor_args" = list("use" = "pairwise.complete.obs")),
55+ "plot_prcomp" = list(),
56+ "plot_boxplot" = list(),
57+ "plot_scatterplot" = list(sampled_rows = 1000L)
58+)
59+
60+
61+create_report(df ,output_file = "scoreboard_report.html",
62+ output_dir="../output" ,
63+ config = config
64+ )
65+
66+
67+
68+
3269 print("So far so good")