Ce document vise à rassembler les commandes utiles pour programmer en R.
library(jsonlite)
decp_Lambersart <- fromJSON(txt = "../data/decp/decp_acheteur.json", flatten = T)
decp_Lambersart <- as.data.frame(decp_Lambersart$marches) |>
mutate(titulaires = map(titulaires, ~ mutate(.x, id = as.character(id)))) |>
unnest(cols = c(titulaires))
library(XML)
library(httr)
data_19 <- xmlParse(content(GET("https://marchespublics596280.fr/app.php/api/v1/donnees-essentielles/contrat/xml-extraire-criteres/50286/a:1:%7Bi:0;i:0;%7D/1/2019/false/false/false/false/false/false/false/false/false", user_agent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36")), "text"),
encoding = "UTF-8")
xml_df_19 <- xmlToDataFrame(nodes = getNodeSet(data_19, "//marche")) |> mutate(Year = 2019)
library(haven)
data <- read_sas("../data/mon_fichier.sas7bdat")
library(data.table)
rbindlist_fread <- function(path, pattern = "*.csv") {
files = list.files(path, pattern, full.names = TRUE)
data.table::rbindlist(lapply(files, function(x) fread(x)))
}
data <- rbindlist_fread("mon/super/path")
library(utils)
download.file("lien/vers/zip.zip", "dossier_complet.zip")
unzip("dossier_complet.zip")
data <- read_delim("dossier_complet.csv", ";", trim_ws = TRUE)
library(googlesheets4)
data <- read_sheet("lien/vers/le/google/sheets")
# Attention : ne marche que si le tableau est en format GoogleSheet et pas Excel déposé sur Drive !!
parse_api_open_alex <- function(start, end){
# Import des données : Works dataset, appels de l'API
works_data <- purrr::map(
.x = dois_bso[start:end,]$doi,
.y = data.frame(matrix(ncol = 1, nrow = 1)),
possibly(.f = ~fromJSON(txt = paste("https://api.openalex.org/works/mailto:diane@datactivist.coop/doi:", .x, sep = ""), flatten = T), otherwise = NA_character_),
.default = NA)
# Aplatissement
# sélection des 2 variables qui nous intéressent
works_df <- purrr::map(
.x = works_data,
.y = data.frame(matrix(ncol = 1, nrow = 1)),
possibly(.f = ~unnest(data.frame( # on récupère chaque élément/variable qui nous intéresse, on les met dans un df
doi = .x$doi,
.x$authorships),
cols = "institutions", names_repair = "universal") |> select(doi, country_code), otherwise = NA_character_),
.default = NA)
# suppression des NA et mise au format tabulaire
works_df <- works_df[works_df != "NA"] # replace NA (DOIs non matchés avec OpenAlex) by NULL
works_df <- rrapply(works_df, condition = Negate(is.null), how = "prune") #remove NULL
works_df <- works_df |> bind_rows()
# Export du df
rio::export(works_df, glue("data/3.external/OpenAlex/french_CA/API_{start}_{end}.csv"))
}
### On applique la fonction pour 50 DOIs
parse_api_open_alex(1,50)
library(rvest)
content <- read_html("url")
body_table <- content |> html_nodes('body') |>
html_nodes('table') |>
html_table(dec = ",")
data <- body_table[[1]]
library(rvest)
library(tidyverse)
data <- purrr::map(
.x = (as.data.frame(rep(1:5, each = 1)) |> rename(page = `rep(1:5, each = 1)`))$page,
.y = data.frame(matrix(ncol = 1, nrow = 1)),
.f = ~read_html(paste0("http://portal.core.edu.au/conf-ranks/?search=&by=all&source=all&sort=atitle&page=", .x)) |> html_nodes('body') |> html_nodes('table') |> html_table(dec = ","),
.default = NA)
data <- bind_rows(data)
# Code valable en janvier 2023, site a évolué maintenant
library(htm2txt)
core_millesime <- purrr::map(
.x = (as.data.frame(rep(1:10, each = 1)) |> rename(page = `rep(1:10, each = 1)`))$page,
.y = data.frame(matrix(ncol = 1, nrow = 1)),
possibly(.f = ~ as.data.frame(gettxt(paste0('http://portal.core.edu.au/conf-ranks/', .x, '/'))) |> #import page par page
rename(text = 1) |>
mutate(text = strsplit(as.character(text), "\n")) |> unnest(text) |> #split les éléments séparés par des "\n"
filter(row_number() == 10 | #nom de conférence
grepl("Acronym:", text) == TRUE |
grepl("Source:", text) == TRUE |
grepl("Rank:", text) == TRUE, #champs que l'on garde
grepl("DBLP", text) == FALSE) |> #retrait de la ligne contenant ce string
mutate(text = case_when(row_number() == 1 ~ paste("Title:", text), TRUE ~ text), #ajout du préfixe "titre:"
champ = str_extract(text, "^[a-zA-Z0-9_]*"), #dans une nouvelle colonne ce qui est avant ":"
value = str_extract(text, "(?< = : )[^\n]*")) |> #dans une nouvelle colonne ce qui est après ": "
select(-text) |> t() |> row_to_names(row_number = 1) |> data.frame() |> #transpose puis 1ère ligne en nom de colonnes
pivot_longer(cols = -c(Title, Acronym), names_to = "number", values_to = "value", names_prefix = "Source|Rank") |> # format long pour rank et source quand multiples
mutate(col = case_when(row_number() %% 2 == 0 ~ "rank",
row_number() %% 2 == 1 ~ "source")) |> #
pivot_wider(names_from = col, values_from = value) |> select(-number) |> mutate(core_id = .x), otherwise = NA_character_),
.default = NA)
# Gestion des Na et mise au format tabulaire
core_histo <- core_millesime[core_millesime != "NA"] # replace NA by NULL
core_histo <- rrapply::rrapply(core_histo, condition = Negate(is.null), how = "prune") #remove NULL
core_histo <- core_histo |> bind_rows()
packages = c("tidyverse", "jsonlite", "glue", "parallel", "doParallel", "foreach")
package.check <- lapply(
packages,
FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
}
)
library(janitor)
data <- data |> clean_names() # retire majuscules, espaces et caractères spéciaux
library(janitor)
clean_some_names <- function(dat, idx, ...) {
names(dat)[idx] <- janitor::make_clean_names(names(dat)[idx], ...)
dat
}
data <- data |>
clean_some_names(14:18)
data <- data |> rename_at(vars(-Name, -State), ~ paste0(., '_2017'))
data <- data |> select(indicateurs, order(colnames(data)))
# Dans le cas où les niveaux changent, ex : "3 à 5 fois par semaine (12 commerces)"
table |>
mutate(discu_nb_commerces = factor(discu_nb_commerces,
levels = table |>
distinct(`Avez-vous l'habitude de discuter avec vos clients en dehors du cadre strict de la vente ? (exemple : prendre des nouvelles personnelles)`, discu_nb_commerces) %>%
arrange(factor(`Avez-vous l'habitude de discuter avec vos clients en dehors du cadre strict de la vente ? (exemple : prendre des nouvelles personnelles)`,
levels = c("Jamais", "Très rarement (moins d'une fois par mois)", "Rarement (1 à 2 fois par mois)", "Ponctuellement (1 à 2 fois par semaine)", "Régulièrement (3 à 5 fois par semaine)", "Très régulièrement (plusieurs fois par jour)"))) |>
pull(discu_nb_commerces)))
data <- data |>
group_by(`Référence commande`) |>
fill(everything(), .direction = "downup") |>
ungroup()
# NAs d'une colonne par 0
data <- data |> mutate(col = replace_na(col, 0))
# NAs du df entier par 0
data <- data |> mutate_all(replace_na, 0)
# NAs par string
data <- data |> replace(is.na(.), "unknown")
# NAs par valeurs autre colonne
data <- data |> mutate(col_NA = coalesce(col_NA, col_replace))
# Cellules vides par NAs
data <- data |> mutate_all(na_if, "")
# NULL par NAs
data <- data |> replace(. == "NULL", NA)
data[data == "null"] <- NA
# Chiffres négatifs par NAs
data <- data |> mutate(col = replace(col, which(col<0), NA))
# Valeur par NAs sur certaines colonnes
data <- data |> mutate(across(starts_with("Choix_"), ~ na_if(.x, "Pas de préférence")))
data <- data |> mutate_at(vars(var_3:var_17), ~round(.,0))
plyr::round_any(x, 5)
# Nombres arrondis au million
format(round(100000000 / 1e6, 1), trim = TRUE)
# Centaines et milliers séparés des virgules [comma]
format(as.integer(1000000, 0), nsmall = 1, big.mark = ".")
library(janitor)
data <- data |>
group_by(Var2) |>
group_modify(~ adorn_totals(.x, where = "row")) |>
ungroup() |>
arrange(fct_relevel(Commune, 'Total')) #puis trier avec ligne "Total" en haut puis ordre alphabétique sur colonne "Commune"
# Solving error "trying to re-add a totals dimension that is already been added"
data |>
untabyl() |> #ajouter avant le total
adorn_totals()
rep()
, seq()
c(rep(1:5570, each = 50), rep(5571, each = 7))
data <- data |>
group_by(defi_profil) |>
mutate(groupNbr = cur_group_id())
data <- mutate(date = format(as.Date(Date, format = "%Y-%m-%d %H:%M:%S"),"%d %B %Y")) #30 mai 2023
# autres formats : https://www.r-bloggers.com/2013/08/date-formats-in-r/
data <- data |> mutate(nb_weeks = round(as.numeric(difftime(fin, Sys.Date(), units = "weeks")), 0), #semaine
nb_month = round(as.numeric(difftime(fin, Sys.Date(), units = "weeks") /4.34524), 0)) #mois
library(mondate)
data <- data |> mutate(date_fin = as.mondate(date_debut) + duree) #duree en mois
data <- data |>
mutate(has_intersection = all(sapply(1:(n() - 1), function(i) {
all(pmax(min[i], min[(i + 1):n()]) <= pmin(max[i]+3, max[(i + 1):n()]+3))
})), .by = author_id) #où min est âge_min et max est age_max de la tranche
data <- data |> mutate(tranche_age = cut(age, c(18,20, seq(30, 90, 5), 98)))
data <- data |> mutate(age = round(as.numeric(difftime(Sys.Date(), dateOfBirth, units = "weeks")) / 52.1429, 0)) #année
# Valeurs d'une colonne
data <- data |> mutate(col = str_replace_all(col, c("pattern1 | pattern2" = "replacement")))
# Attention à escape les parenthèses pour que le remplacement fonctionne
data <- data |> mutate(col = str_replace_all(col, c("string avec \\(parenthèses\\)" = "replacement")))
# Valeurs du df entier
data <- data |> mutate_all(function(x) gsub("pattern1 | pattern2", "replacement", x))
data |> mutate_at(vars(January:December), ~str_replace(., ",", "."))
data <- data |> mutate(col = stringi::stri_trans_general(str = gsub("-", " ", toupper(string)), id = "Latin-ASCII"))
library(tools)
toTitleCase(tolower("MY STRING"))
toTitleCase("my other string")
gsub("^(\\w)(\\w+)", "\\U\\1\\L\\2", "my other string", perl = TRUE)
data <- data |> mutate(col = removeWords(string, c("IEEE ", "ACM ", "SIAM ")))
rem_dup_word <- function(x){
#x <- tolower(x)
paste(unique(trimws(unlist(strsplit(x, split = " ", fixed = F, perl = T)))), collapse = " ")
}
rem_dup_word(x)
data <- data |> mutate(col = gsub("\\W*\\b\\w\\b\\W*", " ", string))
data <- data |> mutate(col = trimws(string, which = "left"))
data <- data |> mutate(col = str_squish(col)) #specific column
data <- data |> mutate_all(~str_squish(.)) #all character columns
data <- data |> mutate(num = gsub("^0", "", num))
# Supprimer les caractères spéciaux ex : ? ' !
data <- data |> mutate(col = str_replace_all(col, "[^[:alnum:]]", " "))
data <- data |> mutate(first_word = word(string, 1))
library(strex)
data <- data |> mutate(min = str_nth_number(string, n = 1)) # extrait le 1er chiffre du string
data <- data |> mutate(annee = str_extract(`En quelle année ?`, "(1|2)\\d{3}")) #seulement "\\d{5}" pour zipCode
data <- data |> mutate(sub_string = substr(string, 1, n))
data <- data |> mutate(sub_string = substr(string, nchar(string)-n+1, nchar(string)))
data <- data |> mutate(num = sprintf("%02d", num)) #passer de 1 à 2 digits
data <- data |> mutate(num = str_pad(num, 14, pad = "0")) #obliger à avoir 14 caractères (donc ajoute 0 en début si besoin)
nchar(string) == 1
str_detect(string, "[0-9]") == TRUE
grep("\\d+", string, value = TRUE)
grepl('[^[:alnum:]]', string)
grepl("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", string)
str_extract(string, "^\\D+")
str_extract(string, "^[a-zA-Z0-9_]*")
str_extract(string, "(?<=: )[^\n]*")
str_extract(string, "(?<=\\[).*")
substr(Question, 1, nchar(Question)-1) #remove last character
str_extract("string bla [da]", "^.*\\[") #crochet inclu dans l'extraction
str_extract(string, "^[^(]+")
str_extract(string, "\\(.*?\\)")
str_extract(string, "\\s+(.*)")
sapply(str_extract_all("+3214FSEtest!! 1", "[\\d+]+"), function(x) paste(x,collapse=""))
# Reformater code postal mal renseigné (avec des espaces entre les 5 digits)
# Exemple : "49, avenue Du grau du rieu Marseillan Occitanie 34 340 France"
mutate(adresses = str_replace_all(adresses, "\\b(\\d{2}) (\\d{3})\\b", "\\1\\2"))
data <- data |>
pull(column) |> pluck() |> bind_rows() |>
group_by(author_id) |> mutate(n = n()) |> select(author_id, n) |> distinct()
# Exemple
comm1 <- data |>
filter(lengths(comments) != 0) |>
group_by(id) |>
mutate(nb_comments = nrow(comments[[1]])) |>
select(id2, id, comments, nb_comments) |>
pluck() |> bind_rows()
comm2 <- comm1 |>
pull(comments) |>
pluck() |> bind_rows()
comm1.1 <- comm1 |> ungroup() |>
mutate(index = row_number()) |>
group_by(id) |>
slice(rep(1:n(), each = nb_comments)) |>
arrange(index)
proj_comm_date <- cbind(comm1.1, comm2) |> ungroup() |>
select(id2, author_id, publishedAt) |>
rename(date = publishedAt) |> mutate(type = "commentaires")
data <- data |> filter(!grepl(',', column)) #containing comma
data <- data |> filter(grepl("mot particulier", column) == TRUE)
data <- data |> filter_all(all_vars(grepl("mot", .)))
data |> filter(row_number() %% 2 == 0) # pair
data |> filter(row_number() %% 2 == 1) # impair
data |> group_by(cat) |> filter(across(where(is.character), ~. != "N/A"))
data |>
filter(any(projet %in% c("proj1234", "proj4321")),
.by = id)
data |>
filter((degre_etude == min(degre_etude) | degre_etude == min(degre_etude[degre_etude != min(degre_etude)])) |
(degre_etude == min(degre_etude) | is.na(degre_etude)),
.by = id)
m3 <- data |> select(c(BATIMENTS:TYPE_DE_BATIMENTS, starts_with("m3"))) |>
mutate_all(as.character) |>
pivot_longer(cols = -c(BATIMENTS:TYPE_DE_BATIMENTS), names_to = "Annee", values_to = "m3", names_prefix = "m3_")
montant <- data |> select(c(BATIMENTS:TYPE_DE_BATIMENTS, starts_with("montant"))) |>
mutate_all(as.character) |>
pivot_longer(cols = -c(BATIMENTS:TYPE_DE_BATIMENTS), names_to = "Annee", values_to = "montant", names_prefix = "montant_")
final <- cbind(m3, montant |> select(montant))
data <- data |>
pivot_wider(names_from = choix, values_from = nb_interesses, names_prefix = "choix_")
# Split
data <- data |> mutate(journal_issns = strsplit(as.character(journal_issns), ",")) |> unnest(journal_issns)
# Unsplit
data <- data |> mutate(journal_issns = paste0(unique(na.omit(journal_issns)), collapse = ","))
data |> uncount(x)
data.frame(col1 = c(0, 167, 73),
col2 = c(62, 0, 73)) |>
slice(rep(1:n(), times = c(col2[1], col1[2], col2[3])))
random <- c("groupe 1", "groupe 2", "groupe 3")
sample(random, size = nrow(data), replace = TRUE, prob = c(1/2,3/5,2/5))
data <- data |> mutate(new_col = coalesce(col1,col2,col3))
library(fuzzyjoin)
data <- stringdist_left_join(data, data2, by = "col_name", max_dist = 5, distance_col = "distance") |>
group_by(nom) |> slice_min(distance)
rbind()
data_merged <- merge(df_1, df_2, all = TRUE)
rbind()
when different number of
columnsdata <- list(cat1, cat2) |> bind_rows(.id = 'origine_df')
anti_join(df1, df2)
semi_join(df1, df2)
identical(data$id1, data$id2)
library(unix) #pour linux
rlimit_as(1e20) #increases to ~12GB
# Une colonne
data |> count(is.na(col_name))
# Toute les colonnes
nb_NA <- as.data.frame(apply(is.na(data), 2, sum)) |>
rename(`nombre de NA` = `apply(is.na(data), 2, sum)`) |>
mutate(pourcentage = `nombre de NA`/nrow(data)*100) |>
mutate(pourcentage = round(pourcentage, 2)) |>
arrange(desc(pourcentage)) |>
rownames_to_column() |>
rename(variable = rowname)
table <- data |>
summarise_all(list(~sum(!is.na(.))), .by = group)
data <- as.data.frame(table(data$column)) #R base
data <- data |> group_by(group) |> count(column) #dplyr
data <- data |> summarise(n = n(), .by = group) #dplyr
data |> mutate(new_cat = sum(n[Catégorie2 == "Total"])) #[]
data |> rowwise() |> mutate(sum_multiple = sum(c_across(var_3:var_17)))
data |> group_by(Structure) |> mutate(ecart = Pourcentage - lag(Pourcentage))
# Fonction pour calculer une médiane pondérée
weighted_median <- function(x, w) {
df <- data.frame(x = x, w = w) |>
arrange(x)
cum_w <- cumsum(df$w)
cut_point <- sum(df$w) / 2
median <- df$x[which(cum_w >= cut_point)[1]]
return(median)
}
# Calculs pondérés
library(stats)
library(questionr)
data |> mutate(n_moyen_stats = round(weighted.mean(n_repondants, PONDFIN_logit), 2), #package stats
n_moyen_questionr = round(questionr::wtd.mean(n_repondants, PONDFIN_logit), 2), #package questionr
n_moyen_manuel = round(sum(n_repondants * PONDFIN_logit) / sum(PONDFIN_logit), 2), #calcul manuel
n_median_pondere = weighted_median(n_repondants, PONDFIN_logit)) #médiane
# geom_bar ordre alphabetic, après arrange()
mutate(colonne = factor(colonne, levels = rev(unique(colonne))))
# geom_bar décroissant selon n
mutate(colonne = fct_reorder(colonne, n))
# Afficher plusieurs ggplots
library(gridExtra)
grid.arrange(g1,g2,g3, ncol = 3, nrow = 1,
top = grid::textGrob("Titre", gp = grid::gpar(fontsize = 15, font = 2)))
# Aligner les boxes
library(cowplot)
plot_grid(p3.1, p3.2, p3.3, p3.4, p3.5, align = 'vh')
# Afficher plusieurs ggplotlys
library(plotly)
subplot(plotly_positif, plotly_negatif, nrows = 1)
# Pas de message grid.arrange() dans rmd
graph <- grid.arrange(g1,g2)
grid::grid.draw(graph)
# Passer en plotly
ggplotly(graph, tooltip = c("text")) |>
layout(xaxis = list(autorange = TRUE), yaxis = list(autorange = TRUE)) #auto adjust scale when click on element
# Passer en giraph : mettre les geométries en interactif !! ex: geom_segment_interactive()
graphc <- ggplot(data, aes(x = Réponses, y = Pourcentage, fill = Edition,
tooltip = paste0(Pourcentage*100, "% en ", Edition))) +#texte au survol
geom_point_interactive() +
theme_minimal()
girafe(print(graph), width_svg = 15, height_svg = 12)
data |>
ggplot(aes(y = n)) +
geom_col(aes(x = cycle, fill = rowname, alpha = cycle != "Catégorie"), color = "white", position = "stack", width = 0.7) +
scale_alpha_manual(values=c(1, .4))
font <- "Helvetica"
custom_theme <- function (){
font <- "Helvetica"
ggplot2::theme(plot.title = ggplot2::element_text(family = font,size = 21, face = "bold", color = "#222222"),
plot.subtitle = ggplot2::element_text(family = font,size = 18, face = "italic", margin = ggplot2::margin(0, 0, 9, 0)),
plot.caption = ggplot2::element_text(family = font,size = 18, margin = ggplot2::margin(9, 0, 9, 0)),
plot.title.position = "plot",
plot.caption.position = "plot",
legend.title = ggplot2::element_text(family = font, size = 18, color = "#222222"),
legend.position = "top",
legend.text.align = 0,
legend.background = ggplot2::element_blank(),
legend.key = ggplot2::element_blank(),
legend.text = ggplot2::element_text(family = font, size = 18,color = "#222222"),
axis.text = ggplot2::element_text(family = font, size = 15,color = "#222222"),
axis.text.x = ggplot2::element_text(margin = ggplot2::margin(5,b = 10)),
axis.title = ggplot2::element_text(family = font, size = 18,color = "#222222"),
axis.ticks = ggplot2::element_blank(),
axis.line = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_line(color = "#cbcbcb"),
panel.grid.major.x = ggplot2::element_blank(),
panel.background = ggplot2::element_blank(),
strip.background = ggplot2::element_rect(fill = "white"),
strip.text = ggplot2::element_text(size = 22, hjust = 0, face = "bold"))
}
custom_theme_largeG <- function (){
font <- "Helvetica"
ggplot2::theme(plot.title = ggplot2::element_text(family = font,size = 25, face = "bold", color = "#222222"),
plot.subtitle = ggplot2::element_text(family = font,size = 18, face = "italic", margin = ggplot2::margin(0, 0, 9, 0)),
plot.caption = ggplot2::element_text(family = font,size = 18, margin = ggplot2::margin(9, 0, 9, 0)),
plot.title.position = "plot", #titre commence où y-axis commencent !!
plot.caption.position = "plot",
legend.title = ggplot2::element_text(family = font, size = 18, color = "#222222"),
legend.position = "top",
legend.text.align = 0,
legend.background = ggplot2::element_blank(),
legend.key = ggplot2::element_blank(),
legend.text = ggplot2::element_text(family = font, size = 18,color = "#222222"),
axis.text = ggplot2::element_text(family = font, size = 24,color = "#222222"),
axis.text.x = ggplot2::element_text(margin = ggplot2::margin(5,b = 10)),
axis.title = ggplot2::element_text(family = font, size = 27,color = "#222222"),
axis.ticks = ggplot2::element_blank(),
axis.line = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_line(color = "#cbcbcb"),
panel.grid.major.x = ggplot2::element_blank(),
panel.background = ggplot2::element_blank(),
strip.background = ggplot2::element_rect(fill = "white"),
strip.text = ggplot2::element_text(size = 22, hjust = 0, face = "bold"))
}
# change grid when coord_flip()
theme(panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
panel.grid.major.y = ggplot2::element_blank())
# Geométrie initiale
geom_line(size = 1.7, alpha = 0.9, linetype = 1, color = "#0066CC") +
geom_point(colour = "#0066CC", fill = "#0066CC", size = 2, pch = 21, stroke = 1.5) +
geom_bar(position = position_dodge(.9), stat = "identity", width = .8, fill = "#21468d") + #.9 et width pour barres pas collées
geom_bar(aes(x = forcats::fct_infreq(adequation))) + #fct_infreq pour ordonner selon count
geom_col(position = "stack", width = 0.7, color = "white") + coord_flip() + #cas particulier de geom_bar où on prend n comme Y et non count
geom_text_wordcloud(family = "Montserrat") +
# Géométrie additionnelle
geom_text(aes(y = 1, label = title_projet), hjust = "bottom", #aligner geom_text à gauche avec coord_flip
fontface = "italic", size = 5, hjust = 0) +
geom_label(aes(y = 1, label = title_projet), hjust = "bottom", fontface = "italic", size = 2.6,
fill = "white", label.size = NA, hjust = 0) + #white background and remove black borders à la fin sinon marche pas !
geom_label(aes(y = 1, label = title_projet), hjust = "bottom", fontface = "italic", size = 2.6,
fill = "white", label.size = NA, position = position_dodge(width = .9), hjust = 0) + #pour double barres plot (dodge)
stat_count(geom = "text", colour = "white", size = 4,
aes(label = ..count.., y = ..count..+.7), #y pour positionnement juste au dessus des barres
position = position_stack(vjust = 0.5)) + #geom_text des geom_bar sans y
geom_vline(xintercept = -.5, linetype = 2, color = "#0066CC") +
# Annotate a graph
coord_fixed(clip = 'off') # geom_label() déborde du graph
xlim(1, 100) +
scale_y_continuous(labels = scales::comma) + #grands chiffres lisibles
scale_y_continuous(breaks = scales::pretty_breaks()) + #breaks réguliers, plus lisible (pas d'axe)
scale_y_continuous(labels = scales::percent, limits = c(0,1)) + # pourcentages
scale_y_discrete(limits = 1:12) + #valeurs discrètes
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 50)) + #axis-text trop longs sur plusieurs lignes
scale_color_continuous(high = "#132B43", low = "#56B1F7") #reverse color
scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10), limits = c(-.2, 10)) # de 0 à 10 avec les breaks spécifiés
scale_fill_manual(values = c("Non, mais c’est prévu dans les 12 prochains mois" = "#e1b44d",
"Oui, au cours des deux dernières années" = "#323465",
"Oui" = "#33bbc9"),
labels = c("Non, mais c’est prévu dans les 12 prochains mois" = "Prévue dans les 12 prochains mois",
"Oui, au cours des deux dernières années" = "Engagée au cours des deux dernières années",
"Oui" = "Engagée"), #rename categories legend
breaks = c("Oui", "Oui, au cours des deux dernières années", "Non, mais c’est prévu dans les 12 prochains mois")) #order items legend
# titres trop longs, automatiquement coupés
title = stringr::str_wrap("Exemple de titre très très très très très très très très très très très très long", width = 45)
# titres avec des mots colorés
library(ggtext)
ggplot(data) +
geom_point() +
labs(title = "Dans mon titre je veux mettre en avant <span style='color: #323465; font-size: 23pt;'>cette catégorie</span> par rapport aux autres") +
theme(plot.title = element_markdown()) #coloré et taille plus grande
theme_classic() +
theme(panel.grid.major.y = ggplot2::element_line(color = "#cbcbcb"), #lignes horizontales fond graphique en gris (BBC thème)
strip.text.x = element_text(face = "bold"), #label des facettes
axis.title.x = element_text(margin = margin(t = 5, r = 0, b = 5, l = 0)), #augmenter marges entre texte et labels des axes
plot.title = element_textbox_simple(hjust = 1), #hjust: titre aligné à droite, element_textbox_simple line break auto titre
plot.background = element_rect(fill = "#FAF3EE", colour = "#FAF3EE"), #background couleur Datactivist
panel.background = element_rect(fill = "#FAF3EE", colour = "#FAF3EE"),
legend.background = element_rect(fill = "#FAF3EE", colour = "#FAF3EE"),
legend.box = "vertical", legend.box.just = "left", #multiple guide_legend each one on new row, for top legend
) +
facet_grid(Projet ~ .,
scales = "free", #scales = "free" pour label différents d'une facette à une autre
space = "free") + #space = "free" pour hauteur différentes selon le nombre d'éléments par facette
facet_zoom(x = annee > 2014, split = TRUE) +
ggforce::facet_col(facets = vars(Projet),
scales = "free_y",
space = "free") # pour avoir scales et face de facet_grid avec labels to the top de facet_wrap
facet_wrap(~vote, scales='free_x') + scale_y_continuous(limits=c(0 ,50)) # pour avoir les ticks sur chaque facettes et pas juste celles du bas
guides(fill = guide_legend(nrow = 6, byrow = TRUE, # nombre d'éléments par ligne
title = "titre légende"), # titre légende
lwd = "none", #ne pas afficher une légende en particulier
col = guide_legend(title = "", reverse = TRUE, override.aes = list(lwd = 2))) + #lwd = 2 pour ligne plus épaisse et plus visible dans la légende
# Deux légendes sur un même graph, affichées l'une sous l'autre
theme(legend.box = "vertical", legend.box.just = "left")
scale_fill_manual(values = c("#c898ae", "#da4729", "#f38337", "#74a466", "#fecf5d", "#5E79AC")) #couleurs Bauhaus
iris |>
group_by(species) |> summarise(Freq = n()) |>
mutate(fraction = Freq / sum(Freq),
proportion = round((Freq / sum(Freq))*100),
ymax = cumsum(fraction),
ymin = c(0, head(ymax, n = -1)),
labelPosition = (ymax + ymin) / 2) |>
ggplot(aes(ymax = ymax, ymin = ymin, xmax = 3.3, xmin = 2, fill = species)) +
geom_rect(col = "white", linewidth = 2) +
geom_text(x = 4, aes(y = labelPosition, label = paste(proportion,"%",sep = "")), color = "#333333", size = 7) +
geom_text(aes(x = 0, y = 0, label = sum(Freq)), col = "#333333", alpha = 0.8, size = 14, fontface = "bold", inherit.aes = FALSE) +
scale_fill_manual(values = c('#fecf5d', '#21468d','#82888d')) +
coord_polar(theta = "y") +
labs(title = "Titre") +
xlim(c(0, 4)) +
theme_void() +
theme(legend.position = "right",
plot.title = element_text(face = "bold"))+
guides(fill = guide_legend(title = ""))
iris |>
ggplot() +
aes(x = sepal_length) +
geom_histogram(bins = 7L, col = "white", fill = "#21468d", width = 5) +
geom_vline(xintercept = mean(iris$sepal_length, na.rm = TRUE), linetype = 2, col = "red") +
geom_text(aes(x = mean(sepal_length, na.rm = T), y = 1, label = paste("Moyenne :", round(mean(sepal_length, na.rm = T), 1))),
col = "red", fontface = "italic", hjust = 0, size = 5) +
labs(x = "Valeur",y = "Fréquence", title = "Titre",
subtitle = paste(iris |> filter(is.na(sepal_length)) |> nrow(), "valeur manquante")) +
theme_classic() +
theme(plot.subtitle = element_text(face = "italic"),
plot.title = element_text(face = "bold")) +
scale_x_continuous(n.breaks = 10)
iris |>
#select(1, 37:40) |>
reshape2::melt(id.vars = c("species")) |>
ggplot(aes(x = value)) +
geom_histogram(bins = 10L, col = "white", fill = "#21468d", width = 5) +
labs(x = "Valeur", y = "Fréquence", title = "Titre") +
theme_classic() +
theme(plot.title = element_text(face = "bold")) +
facet_wrap(variable ~ .)
ggplot(data = table_femmes) +
geom_histogram(mapping = aes(x = percent, y=..density..), fill = "#fd710f", color = "white", alpha = .7) +
stat_function(fun = dnorm, args = list(mean = mean(table_femmes$percent), sd = sd(table_femmes$percent)),
size = 1, alpha = .8, aes(col = "Distribution normale")) +
scale_colour_manual("", values = c("#cb1d27")) +
labs(title = "Distribution de la part des femmes dans les cohortes", x = "Pourcentage de femmes", y = "Densité") +
theme_classic() +
scale_x_continuous(labels = scales::percent, limits = c(0,1)) +
geom_vline(xintercept = .5, linetype = 2, col = "#666666", linewidth = .7) +
custom_theme() +
theme(plot.title = ggplot2::element_text(size = 18))
library(lemon)
iris |>
filter(species != "setosa") |>
mutate(tranche_pw = cut(petal_width, c(0.1, 0.5, seq(1, 2.5, .5)))) |>
group_by(tranche_pw, species) |>
summarise(Freq = n()) |> ungroup() |> group_by(species) |>
mutate(percent = Freq / sum(Freq) *100,
percent = ifelse(percent < 0.5, round(percent, 1), round(percent, 0))) |>
na.omit() |>
ungroup() |>
ggplot(mapping = aes(x = ifelse(species == "versicolor", -Freq, Freq), y = tranche_pw, fill = species)) +
geom_col(col = "white", size = 1.3) + #col = "black"
geom_text(aes(y = tranche_pw, x = ifelse(species == "versicolor", -Freq-1, Freq+1), label = paste(percent,"%",sep = "")),
color = "#333333", size = 3, check_overlap = T) +
scale_x_symmetric(labels = abs) +
scale_colour_manual(values = c('#fecf5d', '#21468d'),
aesthetics = c("colour", "fill")) +
labs(x = "Fréquence", y = "", title = "Titre") +
theme_classic() +
theme(legend.position = "right",
plot.title = element_text(face = "bold"),
axis.title.y = element_blank(),
axis.line.y = element_blank(),
axis.ticks.y = element_blank()) +
guides(fill = guide_legend(title = ""))
iris |>
group_by(species) |>
summarise(n = n()) |> ungroup() |> na.omit() |>
mutate(percent = round((n / sum(n))*100, 0)) |>
mutate(species = fct_reorder(species, n)) |>
ggplot(aes(x = species, y = n))+
geom_bar(stat = "identity", width = .6, fill = "#21468d") +
geom_text(aes(y = n+3, x = species, label = paste(percent,"%",sep = "")),
color = "#333333", size = 3, check_overlap = T) +
coord_flip() +
labs(x = "", y = "Fréquence", title = "Titre") +
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 70)) + #axis-text trop longs sur plusieurs lignes
theme_classic() +
theme(plot.title = element_text(face = "bold"),
axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))
devtools::install_github("ricardo-bion/ggradar") #installation impossible
library(ggradar)
iris |>
select(sepal_length, sepal_length:petal_width) |>
summarise_all(funs(mean(., na.rm = T))) |>
ggradar(values.radar = c("1", "3", "5"),
grid.min = 1, grid.mid = 3, grid.max = 5,
# Polygones
group.line.width = 1,
group.point.size = 3,
group.colours = "#21468d",
# Arrière-plan et lignes de grille
background.circle.colour = "white",
gridline.mid.colour = "grey") +
xlim(-10,10) + #selon longueur catégories
labs(title = "Titre") +
theme(legend.position = "none",
plot.title = element_text(face = "bold"))
comp_data <- data.frame("variable" = c("sepal_length", "sepal_width", "petal_length", "petal_width"),
"Minimum" = c(min(iris$sepal_length), min(iris$sepal_width),
min(iris$petal_length), min(iris$petal_width)),
"Maximum" = c(max(iris$sepal_length), max(iris$sepal_width),
max(iris$petal_length), max(iris$petal_width)),
"Moyenne" = c(mean(iris$sepal_length), mean(iris$sepal_width),
mean(iris$petal_length), mean(iris$petal_width)))
comp_data |>
arrange(variable) |>
mutate(variable = factor(variable, levels = rev(unique(variable)))) |>
ggplot() +
geom_segment(aes(x = Minimum, xend = Maximum, y = variable, yend = variable, colour = variable), lwd = 10) +
geom_segment(aes(x = Moyenne, xend = Moyenne+.05, y = variable, yend = variable),
colour = ifelse(comp_data$variable == "sepal_length", "white", "black"), lwd = 10) +
geom_text(aes(x = Moyenne+.2, y = variable, label = paste("Moyenne :", round(Moyenne, 1)), hjust = "bottom"),
size = 3, col = ifelse(comp_data$variable == "sepal_length", "white", "#333333")) +
labs(title = "Titre", x = "Score", y = "") +
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 50)) +
scale_color_manual(values = c("petal_length" = "#21468d",
"sepal_width" = "#fecf5d",
"sepal_length" = "#d55152",
"petal_width" = "#ddd6ad")) +
theme_bw() +
theme(legend.position = "none",
plot.title = element_text(face = "bold"))
library(icons)
library(tidyverse)
# Table
df <- data.frame(
x = c(2, 8.5, 15, 21.5),
y = rep(6.5, 4),
h = rep(4.25, 4),
w = rep(6.25, 4),
value = c(5, 7, 17, 5),
info = c("Communes",
"Participants",
"Jeux de données \nouverts",
"Réutilisations"),
color = factor(1:4)
)
# Graphique
ggplot(df, aes(x, y, height = h, width = w, label = info)) +
## Create the tiles using the `color` column
geom_tile(aes(fill = color)) +
## Add the numeric values as text in `value` column
geom_text(color = "white", fontface = "bold", size = 16,
aes(label = ifelse(value > 999, format(as.integer(value, 0), nsmall = 1, big.mark = "."), value),
x = x, y = y + 0.7),
hjust = 0.5) +
## Add the labels for each box stored in the `info` column
geom_text(color = "white", fontface = "bold", size = 10,
aes(label = info, x = x - 2.9, y = y - 1), hjust = 0) +
coord_fixed() +
#scale_fill_manual(type = "qual", palette = "Dark2") +
scale_fill_manual(values = c("#9bcea4", "#ef7875", "#ffcc00", "#23ae84", "#fecf5d", "#21468d")) +
## Use `geom_text()` to add the icons by specifying the unicode symbol.
theme_void() +
guides(fill = FALSE)
# couleurs
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
bgcolors <- matrix("black", nrow(matrix), ncol(matrix),dimnames = dimnames(matrix))
bgcolors[,2] <- "red"
bgcolors <- bgcolors[lower.tri(bgcolors, diag=TRUE)]
# matrice
ggcorrplot(matrix, hc.order = T, type = "lower", show.diag = TRUE, legend.title = "",
lab = TRUE, lab_col = bgcolors, colors = c("#BB4444", "white", "#4477AA")) +
labs(title = stringr::str_wrap("Décorrelation entre la phase <span style='color: red;'>Analyse, traitement, calcul</span> et les autres phases du cycle de vie", width = 50),
subtitle = "Les phases de dépôt et de partage fortement connectées entre elles",
caption = "Sur quelle(s) partie(s) du cycle de vie des données intervenez-vous ? - 5642 réponses") +
theme(axis.text.y = element_blank(),
panel.grid = element_blank(),
plot.subtitle = element_markdown(size = 15),
plot.title = element_markdown(size = 18, lineheight = 2),
plot.caption = element_text(size = 15, face = "italic", hjust = 0, color = "#cbcbcb")) +
geom_label(aes(x = 6, y = 7), label = "Dépot", family = font, vjust = .4, hjust = "right",
fill = "white", label.size = NA) +
geom_label(aes(x = 5, y = 6), label = "Préparation du dépôt", family = font, vjust = .4, hjust = "right",
fill = "white", label.size = NA) +
geom_label(aes(x = 4, y = 5), label = "Ouverture ou partage", family = font, vjust = .4, hjust = "right",
fill = "white", label.size = NA) +
geom_label(aes(x = 3, y = 4), label = "Planification, PGD", family = font, vjust = .4, hjust = "right",
fill = "white", label.size = NA) +
geom_label(aes(x = 2, y = 3), label = "Réutilisation", family = font, vjust = .4, hjust = "right",
fill = "white", label.size = NA) +
geom_label(aes(x = 1, y = 2), label = "Analyse, traitement, calcul", family = font, vjust = .4, hjust = "right",
fill = "white", label.size = NA) +
geom_label(aes(x = 0, y = 1), label = "Collecte, création, stockage", family = font, vjust = .4, hjust = "right",
fill = "white", label.size = NA) +
coord_fixed(clip = 'off')
test <- table |>
mutate(ecart = ecart/100,
type_ecart = case_when(ecart >= 0 ~ "positif",
ecart < 0 ~ "negatif",
.default = NA_character_)) |>
select(Structure, Réponses, ecart, type_ecart) |> na.omit()
test |>
ggplot(aes(x = Réponses, y = ecart, fill = type_ecart)) +
geom_bar(stat="identity", width=.6, size = 2, alpha = .7) +
coord_flip() +
labs(x = "", y = "Écart en points de pourcentage",
subtitle = "Évolution des réponses entre l'édition 2022 et l'édition 2023",
title = stringr::str_wrap("Commençons par la protection de la vie privée. Considérez-vous que votre structure est en conformité avec le RGPD ?", width = 55)) +
scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 40)) +
scale_y_continuous(labels = scales::percent) + # pourcentages
scale_fill_manual(values = c("positif" = "#288a28", "negatif" = "#a81008")) +
theme_classic() +
custom_theme() +
theme(legend.position = "none",
panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
panel.grid.major.y = ggplot2::element_blank(),
plot.subtitle = element_markdown()) +
geom_label(aes(x = Réponses, y = ecart, label = paste0(ecart*100, "%")),
color = "#333333", size = 4, fill = "white", label.size = NA,
hjust = ifelse(test$ecart >= 0, 0, 1))
# Graph
# titre
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(0%,-170%);
left: 7%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
font-weight: bold;
font-size: 22px;
color: black;
}
"))
title <- tags$div(tag.map.title, HTML("Localisation des participants aux projets (hors ZFE)"))
# carte
proj_loin |>
leaflet() |>
addTiles() |>
addControl(title, position = "topleft", className = "map-title") |>
setView(lng = 3, lat = 47, zoom = 4.8) |>
addCircles(radius = proj_loin$nb_contrib, lng = proj_loin$longitude, lat = proj_loin$latitude, #color = proj_loin$col,
weight = 1, opacity = proj_loin$nb_contrib, fillOpacity = .1,
label = proj_loin$nom_commune,
popup = paste0(proj_loin$nom_commune, ", ", format(as.integer(proj_loin$nb_contrib, 0), nsmall = 1, big.mark = "."),
ifelse(proj_loin$nb_contrib <= 1, " contribution dont :\n", " contributions dont :\n"),
"<br>• ", format(as.integer(proj_loin$questionnaires, 0), nsmall = 1, big.mark = "."), ifelse(proj_loin$questionnaires <= 1, " questionnaire", " questionnaires"),
"<br>• ", format(as.integer(proj_loin$propositions, 0), nsmall = 1, big.mark = "."), ifelse(proj_loin$propositions <= 1, " proposition", " propositions"),
"<br>• ", format(as.integer(proj_loin$commentaires, 0), nsmall = 1, big.mark = "."), ifelse(proj_loin$commentaires <= 1, " commentaire", " commentaires"),
"<br>• ", format(as.integer(proj_loin$votes, 0), nsmall = 1, big.mark = "."), ifelse(proj_loin$votes <= 1, " vote", " votes"))) |>
addProviderTiles(provider = "Esri.WorldGrayCanvas")
# all providers : http://leaflet-extras.github.io/leaflet-providers/preview/index.html
# Carte avec les points qui se précisent au fur et à mesure du zoom
test |>
leaflet() |>
addTiles() |>
setView(lng = 3, lat = 47, zoom = 4.8) |>
addMarkers(lng = test$long, lat = test$lat,
label = test$Concessionnaires_Dénominationsociale,
clusterOptions = markerClusterOptions()) |> #clusterOptions fait cet effet
addProviderTiles(provider = "Esri.WorldGrayCanvas")
# Légende
graph_leg <- table_all |>
filter(Structure == "Une commune" | Structure == "Un EPCI" | Structure == "Un département" |
Structure == "Une région" | Structure == "Autre") |>
mutate(Structure = factor(Structure, levels = c("Autre","Une région","Un département","Un EPCI","Une commune"))) |>
ggplot(aes(x = Structure, y = Pourcentage, fill = Réponses,
text = paste0(Pourcentage*100, "% de réponses '", Réponses, "'"))) +
geom_bar(position = "fill", color = "white", stat="identity", alpha = .9) +
labs(x = "", y = "Pourcentage de réponses", title = "Par type de collectivité") +
scale_y_continuous(labels = scales::percent, limits = c(0,1)) +
scale_fill_manual(values = c("grey", "#bf0001", "#e1b44d", "#33bbc9", "#323465", "#616165", "#0a0a14")) +
facet_wrap(~Edition) +
coord_flip() +
theme_classic() +
custom_theme() +
guides(fill = guide_legend(ncol = 1, title = "", reverse = T)) +
theme(legend.position = "top",
panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
panel.grid.major.y = ggplot2::element_blank())
legend <- get_legend(graph_leg + theme(legend.box.margin = margin(0, 0, 10, 12)))
table |>
mutate(timestamp = as.Date(as.character(paste0(timestamp, "-01")))) |>
ggplot(aes(x = timestamp
#text = paste(n_cumule, "inscrits en", format(timestamp, "%b %Y"))
)) +
geom_line( aes(y=n_cumule), size=1, alpha=0.9, color = "#3366CC") +
geom_line( aes(y=n_cumule_contrib/3.785737), size=1, alpha=0.9, color = "#CC0000") +
labs(x = "Temps", y = "Nombre d'inscrits",
title = stringr::str_wrap("Évolution du nombre d'inscrits et du nombre de contributions cumulés sur la plateforme depuis son lancement", width = 40)) +
scale_y_continuous(name = "Nombre total d'inscrits",
sec.axis = sec_axis(~ . * 3.785737, name = "Nombre total de contributions")) + #scale_x_date(date_labels = "%Y %b") +
theme_classic() +
custom_theme() +
theme(legend.position = "right",
axis.title.y = element_text(color = "#3366CC"),
axis.title.y.right = element_text(color = "#CC0000"))
library(treemap)
library(treemapify)
table |>
filter(row_number() <= 10) |>
ggplot() +
geom_treemap(aes(area = effectif_couple_spe, fill = couple_spe), col = "white", size = 4) +
geom_treemap_text(aes(area = effectif_couple_spe, fill = couple_spe,
label = paste0(couple_spe, "\n(", percent, "%)")),
colour = "white", place = "centre", size = 15, grow = TRUE) +
scale_fill_manual(values = c("#345E68", "#FEDEA0", "#B7C2A5", "#023743","#7A9BB1", "#B8AA75", "#7B8598", "#345B48", "#476F84", "#D0BA7C")) +
labs(title = "Les 10 principales spécialités au BAC choisies par les étudiants d'Ile-de-France") +
custom_theme() +
theme(legend.position = "none")
gttable()
library(gt)
library(gtExtras)
data |>
gt() |>
gt_theme_nytimes() |>
tab_header(title = "Mon titre") |>
tab_style(style = list(cell_text(weight = "bold")), #en gras
locations = cells_body(columns = ma_col)) |>
cols_align(align = "center", columns = everything()) |>
cols_align(align = "left", columns = c(col1, col2)) |>
gt_plt_bar_pct(`% temps`, scaled = TRUE, labels=TRUE, decimals = 0, #attention à remplacer les NA par 0 sinon erreur
font_size = "14px", fill = "#173541", height = 20)
DT::datatable()
datatable(data, options = list(pageLength = 5, scrollX = TRUE))
knitr::kable()
knitr::kable(stat_indiv, format = "html") |>
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
reactable()
library(reactable)
# Fonction pour colorer les catégories
category_styles <- function(Catégorie) {
case_when(
Catégorie == "Faible" ~ list(background = "#e3f2fd", color = "black"),
Catégorie == "Moyen" ~ list(background = "#b5dbfb", color = "black"),
Catégorie == "Élevé" ~ list(background = "#1d82df", color = "white"),
Catégorie == "Très élevé" ~ list(background = "#0d47a1", color = "white"),
Catégorie == "NA" ~ list(background = "#CCCCCC", color = "black"),
TRUE ~ list(background = NA, color = NA)
)
}
# Affichage de la table
table |>
reactable(columns = list(Catégorie = colDef(style = function(value){category_styles(value)})))
# Indicateurs en une table
table <- data.frame(
Nom = c("nom1", "nom3", "nom3"),
Score = c(85, 92, 78),
Age = c(25, 30, 22),
Height = c(160, 175, 168)
)
# Fonction pour colorer les indicateurs
indicateur_styles <- function(Indicateur) {
# calcul des statistiques
q1 <- quantile(Indicateur, 0.25, na.rm = TRUE)
med <- median(Indicateur, na.rm = TRUE)
q3 <- quantile(Indicateur, 0.75, na.rm = TRUE)
# conditions de mise en forme
sapply(Indicateur, function(value) {
dplyr::case_when(
value <= q1 ~ "background-color: #e3f2fd; color: black;",
value > q1 & value <= med ~ "background-color: #b5dbfb; color: black;",
value > med & value <= q3 ~ "background-color: #1d82df; color: white;",
value > q3 ~ "background-color: #0d47a1; color: white;",
TRUE ~ "background-color: #CCCCCC; color: black;"
)
})
}
# Liste des colonnes sur lesquelles appliquer le style
column_defs <- setNames(
lapply(names(table), function(col) {
colDef(
style = if (col != "Nom") {
function(value, index, name) {
indicateur_styles(table[[col]])[index]
}
} else {
NULL
}
)
}),
names(table)
)
# Affichage de la table
table |>
reactable(columns = column_defs, defaultPageSize = 5)
# Fixer la première colonne
data |> reactable(columns = list(ID = colDef(sticky = "left")))
# Fonction pour fixer la première colonne en plus d'appliquer aux autres colonnes la mise en forme des couleurs
column_defs <- setNames(
lapply(names(table), function(col) {
if (col != "ID") {
colDef(
style = function(value, index, name) {
indicateur_styles(table[[col]])[index]
}
)
} else {
colDef(
sticky = "left"
)
}
}),
names(table)
)
# Export de la table
library(htmlwidgets)
library(webshot2)
saveWidget(widget = table_rea, file = "../mon_path/data.html", selfcontained = TRUE)
webshot2::webshot("../mon_path/data.html", "../mon_path/data.jpg",
vwidth = 1200, vheight = 600)
rio::export(data, "~/Downloads/tableau.csv")
write.csv(data, "~/Downloads/tableau.csv", row.names = FALSE, fileEncoding = "UTF-8")
saving_plot <- function(graph, name, width, height) {
ggsave(file = glue("~/Downloads/SVG/{name}.svg"), plot = graph, width = width, height = height)
ggsave(file = glue("~/Downloads/PNG/{name}.png"), plot = graph, width = width, height = height)
}
saving_plot(graph, "histogram", 9, 5)
library(htmlwidgets)
saveWidget(map, file = "ma_carte.html")
includes:
in_header: !expr here::here("inst/rmarkdown/resources/header.html")
# Logo haut de page
htmltools::img(src = "lien/vers/mon/image",
alt = 'logo',
style = 'position:absolute; top:0; right:0; width:400px') #width pour la taille! (ici positionné en haut à droite)
# Mettre dans le header du document RMD
knit: (
function(inputFile, encoding) {
rmarkdown::render(inputFile, params = "ask",
encoding = encoding,
output_dir = "../reports",
output_file = paste0(tools::file_path_sans_ext(inputFile), ".html")) })
# Mettre en corps de texte du RMD où dans un fichier CSS à part
<style>
body {
text-align: justify
}
</style>
print(dfSummary(data_summary, style = "grid", graph.magnif = 1,
valid.col = FALSE, varnumbers = FALSE, tmp.img.dir = "/tmp",
max.distinct.values = 5, headings = FALSE, method = "render",
col.widths = c(300, 200, 100, 50, 20)),
max.tbl.height = 600,
method = "render")
library(showtext)
font_add("Nexa", regular = "Nexa Bold.otf")
font_add("Trade Gothic", regular = "Trade Gothic.otf")
showtext_auto()
# then specify on the CSS the name of the font
output:
rmarkdown::html_document:
toc: true
toc_float: true
toc_depth: 2
number_sections: true
# {-} après certains titres si on veut enlever le numérotage automatique pour ceux-là
# En début de Rmd
library(reticulate)
# Première utilisation
py_install("pandas") #pandas
pip install plotly #plotly, à runer dans le terminal
# Pour utiliser un environnement virtuel
Sys.setenv(RETICULATE_PYTHON = "path/to/python.exe")
virtualenv_create("test_proj")
py_install("pandas", envname = "test_proj", method = "auto")
use_virtualenv("test_proj")
#```{python}
import pandas
import plotly.express as px
matrice = [[43, 57], [12, 88]]
fig = px.imshow(matrice)
fig.show()
#```
<div class = "tocify-extend-page" data-unique = "tocify-extend-page" style = "height: 0;"></div>
# Déposer Rmd sur GDrive pour travailler en collaboration
# LE METTRE DANS UN DOSSIER TRACKDOWN ET LE NOM EN LIGNE DOIT GARDER L'EXTENSION .RMD
trackdown::upload_file(file = "scripts/Rapport_final.Rmd", gfile = "Rapport_final.Rmd")
trackdown::download_file(file = "scripts/Rapport_final.Rmd", gfile = "Rapport_final.Rmd")
<p align="center">
<img src="../figures/graphique.png" width = "110">
</p>
Code à mettre en corps de texte pour que ça run :
# Embed centré, html en ligne (ex: carte ODS)
<div align="center">
<iframe frameborder="0" width="800" height="600" src="lien/vers/mon/graphique"></iframe>
</div>
# HTML en local
<iframe src="Save_cartos/carte_p17_t1.html" height="600" width="1000" style="border: 0px solid #464646;" allowfullscreen="" allow="autoplay" data-external="1"></iframe>
Attention bien mettre “self_contained: false” dans le header. Ouvrir dans un browser pour voir le résultat.
Autre solution :
knitr::include_url(glue('../figures/tableau_global/tableau_global_{name_salarie}.html'))
Mettre ça dans <style>
en corps de
texte du RMD pour enlever la bordure noire.
iframe {
border: none;
}
Pimp my rmd, Yan Holtz
git reset HEAD~1
git revert HEAD
git config pull.rebase false
git stash push
git stash push scripts/tableau_de_bord.html #specific file
git stash # annuler le dernier commit
git stash push --include-untracked # supprimer tous les changements locaux (pull possible ensuite)
Quand message “needs merge” :
remotes
)# Intégrer dans .github/workflows/render-document.yaml
- uses: r-lib/actions/setup-r-dependencies@v2
with:
cache-version: 2
packages:
any::knitr
any::tidyverse
any::flexdashboard
any::remotes
- name: install icons package
run: |
Rscript -e "remotes::install_url('https://raw.githubusercontent.com/datactivist/plans_de_charge/main/.github/workflows/master.tar.gz')"
system(glue("cat {in_dir}/raw_data_{year}.jsonl | jq -c '{{doi, year, bso_classification, hal_id}}' | jq --slurp > {out_dir}/unnested_data_{year}.json"))
library(parallel)
library(doParallel)
library(foreach)
years <- 2013:2020
numCores <- 2
registerDoParallel(numCores)
foreach (year = years) %dopar% {
ma_fonction(year)
}
stopImplicitCluster()
https://fontawesome.com/search?o=r&m=free
https://jpswalsh.github.io/academicons/
fontawesome::fa("play", fill = "#0000CC")
, meilleure
solution.
icon_style(fontawesome("play", style = NULL), fill = "#0000CC")
,
implique ce chunk en début de Rmd :
#remotes::install_github("mitchelloharawild/icons", force = TRUE)
library(icons)
#download_fontawesome()
library(extrafont)
assign(glue("n_{year}"), n, envir = .GlobalEnv)
rm(ls = inter_bv, ratio, area_commune, area_2017)
tryCatch(ma_fonction(data), error = function(e) NULL)
source(here("functions", "match_commune.R"))
object <- memoise::memoise(match_commune, cache = memoise::cache_filesystem(here("cache")))
fonction <- function(data, variable){
data |> filter({{variable}} == 2)
}
fonction <- function(new_cols, cols){
data |> rename({{ new_cols }} := {{ cols }})
}
library(purrr)
purrr::map(.x = c(13:16, 18, 26:38, 40:44, 46, 49, 55, 66:68, 70, 84:90, 93:96, 99:108, 111:113, 115, 139, 162:178, 180, 181, 188:197),
.f = ~table_recap_simple(.x))
table_recap_stat <- rbind(lapply(ls(pattern="^all_stat_"), function(x) get(x))) |>
bind_rows()
datat <- data |> mutate(`NA` = ifelse("NA" %in% names(data), `NA`, "0%"))
auto_copy()
: ! x
and
y
must share the same src”Dans une fonction créée, lorsqu’une base de données est appelée sans être mise comme argument attendu, il faut l’ajouter comme argument de fonction.
inter_categories <- function(data, variable){
# Calcul
min_val <- min(data[[variable]], na.rm = TRUE) #data[[variable]] pour accéder à la variable dans un df
q1 <- quantile(data[[variable]], 0.25, na.rm = TRUE)
med <- median(data[[variable]], na.rm = TRUE)
q3 <- quantile(data[[variable]], 0.75, na.rm = TRUE)
max_val <- max(data[[variable]], na.rm = TRUE)
# Assignation à l'environnement global
assign("min_val", min_val, envir = .GlobalEnv) #nom d'objet "min_val" et pas "min" pour pas crééer de conflit avec le nom de fonction
assign("q1", q1, envir = .GlobalEnv)
assign("med", med, envir = .GlobalEnv)
assign("q3", q3, envir = .GlobalEnv)
assign("max_val", max_val, envir = .GlobalEnv)
}
inter_categories(table, "Densité (habitant par m²)") #nom de variable entre guillemets et pas backticks sinon est considéré comme objet à part et non une variable du jeu de données
# voir script ici [repo privé] : https://github.com/datactivist/plans_de_charge/blob/main/scripts/tableau_de_bord.Rmd
# ATTENTION : ne pas mettre de caractères spéciaux (&.) dans les noms data-navmenu=""
Document sous licence ouverte réalisé par Diane Thierry
diane@datactivist.coop