Ce document vise à rassembler les commandes utiles pour programmer en R.

Départ d’analyse

Import données

JSON

library(jsonlite)
decp_Lambersart <- fromJSON(txt = "../data/decp/decp_acheteur.json", flatten = T)
decp_Lambersart <- as.data.frame(decp_Lambersart$marches) |> 
  mutate(titulaires = map(titulaires, ~ mutate(.x, id = as.character(id)))) |> 
  unnest(cols = c(titulaires))

XML

library(XML)
library(httr)
data_19 <- xmlParse(content(GET("https://marchespublics596280.fr/app.php/api/v1/donnees-essentielles/contrat/xml-extraire-criteres/50286/a:1:%7Bi:0;i:0;%7D/1/2019/false/false/false/false/false/false/false/false/false", user_agent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36")), "text"), 
                      encoding = "UTF-8")
xml_df_19 <- xmlToDataFrame(nodes = getNodeSet(data_19, "//marche")) |> mutate(Year = 2019)

SAS

library(haven)
data <- read_sas("../data/mon_fichier.sas7bdat")

Multiple CSV into folder (import and rbind)

library(data.table)
rbindlist_fread <- function(path, pattern = "*.csv") {
    files = list.files(path, pattern, full.names = TRUE)
    data.table::rbindlist(lapply(files, function(x) fread(x)))
}
data <- rbindlist_fread("mon/super/path")

# Autre technique
fichiers <- list.files(path = "./data/out/datas/", pattern = "liste_urls_valides_.*\\.csv", full.names = TRUE)
test <- lapply(fichiers, read_csv)
donnees_combinees <- do.call(rbind, test)

ZIP

library(utils)
download.file("lien/vers/zip.zip", "dossier_complet.zip")
unzip("dossier_complet.zip")
data <- read_delim("dossier_complet.csv", ";", trim_ws = TRUE)

Google sheets

library(googlesheets4) 
data <- read_sheet("lien/vers/le/google/sheets")
# Attention : ne marche que si le tableau est en format GoogleSheet et pas Excel déposé sur Drive !!

Geo JSON

library(geojsonR)
library(httr)
temp_file <- tempfile(fileext = ".geojson")
GET("https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/georef-france-commune/exports/geojson?lang=fr&refine=reg_name%3A%22Corse%22&facet=facet(name%3D%22reg_name%22%2C%20disjunctive%3Dtrue)&timezone=Europe%2FBerlin", write_disk(temp_file, overwrite = TRUE))
communes_contours_geo <- st_read(temp_file, quiet = TRUE)

API

Open Alex

parse_api_open_alex <- function(start, end){
    
    # Import des données : Works dataset, appels de l'API
    works_data <- purrr::map(
        .x = dois_bso[start:end,]$doi,
        .y = data.frame(matrix(ncol = 1, nrow = 1)),
        possibly(.f = ~fromJSON(txt = paste("https://api.openalex.org/works/mailto:diane@datactivist.coop/doi:", .x, sep = ""), flatten = T), otherwise = NA_character_),
        .default = NA)
    
    # Aplatissement
        # sélection des 2 variables qui nous intéressent
    works_df <- purrr::map(
        .x = works_data,
        .y = data.frame(matrix(ncol = 1, nrow = 1)),
        possibly(.f = ~unnest(data.frame(    # on récupère chaque élément/variable qui nous intéresse, on les met dans un df
                       doi = .x$doi, 
                       .x$authorships),
                   cols = "institutions", names_repair = "universal") |> select(doi, country_code), otherwise = NA_character_), 
        .default = NA)
        # suppression des NA et mise au format tabulaire
    works_df <- works_df[works_df !=  "NA"] # replace NA (DOIs non matchés avec OpenAlex) by NULL
    works_df <- rrapply(works_df, condition = Negate(is.null), how = "prune") #remove NULL
    works_df <- works_df |> bind_rows()
    
    # Export du df
    rio::export(works_df, glue("data/3.external/OpenAlex/french_CA/API_{start}_{end}.csv"))

}


### On applique la fonction pour 50 DOIs
parse_api_open_alex(1,50)

Web scraping

Easy scraping (data into table in 1 page)

library(rvest)
content <- read_html("url")
body_table <- content |> html_nodes('body')  |>
                    html_nodes('table') |>
                    html_table(dec = ",") 
data <- body_table[[1]]

Middle scraping (data into table in multiple pages)

library(rvest)
library(tidyverse)
data <- purrr::map(
        .x = (as.data.frame(rep(1:5, each = 1)) |> rename(page = `rep(1:5, each = 1)`))$page,
        .y = data.frame(matrix(ncol = 1, nrow = 1)),
        .f = ~read_html(paste0("http://portal.core.edu.au/conf-ranks/?search=&by=all&source=all&sort=atitle&page=", .x)) |> html_nodes('body')  |> html_nodes('table') |> html_table(dec = ","), 
        .default = NA)
data <- bind_rows(data)

Difficult scraping (data in the body text in multiple pages)

# Code valable en janvier 2023, site a évolué maintenant
library(htm2txt)
core_millesime <- purrr::map(
        .x = (as.data.frame(rep(1:10, each = 1)) |> rename(page = `rep(1:10, each = 1)`))$page,
        .y = data.frame(matrix(ncol = 1, nrow = 1)),
        possibly(.f = ~ as.data.frame(gettxt(paste0('http://portal.core.edu.au/conf-ranks/', .x, '/'))) |> #import page par page
    rename(text = 1) |> 
    mutate(text = strsplit(as.character(text), "\n")) |> unnest(text) |> #split les éléments séparés par des "\n"
    filter(row_number() ==  10 | #nom de conférence
               grepl("Acronym:", text) ==  TRUE | 
               grepl("Source:", text) ==  TRUE | 
               grepl("Rank:", text) ==  TRUE, #champs que l'on garde
           grepl("DBLP", text) ==  FALSE) |> #retrait de la ligne contenant ce string
    mutate(text = case_when(row_number() ==  1 ~ paste("Title:", text), TRUE ~ text), #ajout du préfixe "titre:"
           champ = str_extract(text, "^[a-zA-Z0-9_]*"), #dans une nouvelle colonne ce qui est avant ":"
           value = str_extract(text, "(?< = : )[^\n]*")) |> #dans une nouvelle colonne ce qui est après ": "
    select(-text) |> t() |> row_to_names(row_number = 1) |> data.frame() |> #transpose puis 1ère ligne en nom de colonnes
    pivot_longer(cols = -c(Title, Acronym), names_to = "number", values_to = "value", names_prefix = "Source|Rank") |> # format long pour rank et source quand multiples
    mutate(col = case_when(row_number() %% 2 ==  0 ~ "rank",
                           row_number() %% 2 ==  1 ~ "source")) |> #
    pivot_wider(names_from = col, values_from = value) |> select(-number) |> mutate(core_id = .x), otherwise = NA_character_),
        .default = NA)

# Gestion des Na et mise au format tabulaire
core_histo <- core_millesime[core_millesime !=  "NA"] # replace NA by NULL
core_histo <- rrapply::rrapply(core_histo, condition = Negate(is.null), how = "prune") #remove NULL
core_histo <- core_histo |> bind_rows()

Packages

Install and/or load multiple packages

packages = c("tidyverse", "jsonlite", "glue", "parallel", "doParallel", "foreach")
package.check <- lapply(
  packages,
  FUN = function(x) {
    if (!require(x, character.only = TRUE)) {
      install.packages(x, dependencies = TRUE)
      library(x, character.only = TRUE)
    }
  }
)

Traitement de variables

Nettoyage de données

Noms de colonnes

Clean column names

library(janitor)
data <- data |> clean_names() # retire majuscules, espaces et caractères spéciaux

Clean specific column names

library(janitor)
clean_some_names <- function(dat, idx, ...) {
  names(dat)[idx] <- janitor::make_clean_names(names(dat)[idx], ...)
  dat
}
data <- data |> 
    clean_some_names(14:18)

Rename all columns with suffix except two

data <- data |> rename_at(vars(-Name, -State), ~ paste0(., '_2017'))

Order alphabetically columns

data <- data |> select(indicateurs, order(colnames(data)))

Order categorical column based on other categorical col

# Dans le cas où les niveaux changent, ex : "3 à 5 fois par semaine (12 commerces)"
table |> 
  mutate(discu_nb_commerces = factor(discu_nb_commerces, 
                                     levels = table |> 
                                       distinct(`Avez-vous l'habitude de discuter avec vos clients en dehors du cadre strict de la vente ? (exemple : prendre des nouvelles personnelles)`, discu_nb_commerces) %>%
                                       arrange(factor(`Avez-vous l'habitude de discuter avec vos clients en dehors du cadre strict de la vente ? (exemple : prendre des nouvelles personnelles)`, 
                                                      levels = c("Jamais", "Très rarement (moins d'une fois par mois)", "Rarement (1 à 2 fois par mois)", "Ponctuellement (1 à 2 fois par semaine)", "Régulièrement (3 à 5 fois par semaine)", "Très régulièrement (plusieurs fois par jour)"))) |> 
                                       pull(discu_nb_commerces)))

NAs

Fill NAs with previous values by group (repeat existing values)

data <- data |> 
    group_by(`Référence commande`) |> 
    fill(everything(), .direction = "downup") |> 
    ungroup()

Replace NA

# NAs d'une colonne par 0
data <- data |> mutate(col = replace_na(col, 0))

# NAs du df entier par 0
data <- data |> mutate_all(replace_na, 0)

# NAs par string
data <- data |> replace(is.na(.), "unknown")

# NAs par valeurs autre colonne
data <- data |> mutate(col_NA = coalesce(col_NA, col_replace))

Replace by NA

# Cellules vides par NAs
data <- data |> mutate_all(na_if, "")

# NULL par NAs
data <- data |> replace(. == "NULL", NA) 
data[data ==  "null"] <- NA

# Chiffres négatifs par NAs
data <- data |> mutate(col = replace(col, which(col<0), NA))

# Valeur par NAs sur certaines colonnes
data <- data |> mutate(across(starts_with("Choix_"), ~ na_if(.x, "Pas de préférence")))

Variables numériques

Round multiple columns

data <- data |> mutate_at(vars(var_3:var_17), ~round(.,0))

Round values to nearest 5

plyr::round_any(x, 5)

Shaping numeric variables

# Nombres arrondis au million
format(round(100000000 / 1e6, 1), trim = TRUE)

# Centaines et milliers séparés des virgules [comma]
format(as.integer(1000000, 0), nsmall = 1, big.mark = ".")

“Total” row per group

library(janitor)
data <- data |> 
    group_by(Var2) |> 
    group_modify(~ adorn_totals(.x, where = "row")) |> 
    ungroup() |> 
    arrange(fct_relevel(Commune, 'Total')) #puis trier avec ligne "Total" en haut puis ordre alphabétique sur colonne "Commune"

# Solving error "trying to re-add a totals dimension that is already been added"
data |> 
    untabyl() |> #ajouter avant le total
    adorn_totals()

Indices, `rep()`, `seq()`

Index rep

c(rep(1:5570, each = 50), rep(5571, each = 7))

Between groups index (not within)

data <- data |> 
    group_by(defi_profil) |> 
    mutate(groupNbr = cur_group_id())

Dates

Change date format

data <- mutate(date = format(as.Date(Date, format = "%Y-%m-%d %H:%M:%S"),"%d %B %Y")) #30 mai 2023
# autres formats : https://www.r-bloggers.com/2013/08/date-formats-in-r/

Dates subtraction

data <- data |> mutate(nb_weeks = round(as.numeric(difftime(fin, Sys.Date(), units = "weeks")), 0), #semaine
                       nb_month = round(as.numeric(difftime(fin, Sys.Date(), units = "weeks") /4.34524), 0)) #mois

Dates sum

library(mondate)
data <- data |> mutate(date_fin = as.mondate(date_debut) + duree) #duree en mois

Âge

Check intersections between intervals

data <- data |> 
    mutate(has_intersection = all(sapply(1:(n() - 1), function(i) {
      all(pmax(min[i], min[(i + 1):n()]) <= pmin(max[i]+3, max[(i + 1):n()]+3)) 
    })), .by = author_id) #où min est âge_min et max est age_max de la tranche

Create age intervals

data <- data |> mutate(tranche_age = cut(age, c(18,20, seq(30, 90, 5), 98)))

Find age from date of birth

data <- data |> mutate(age = round(as.numeric(difftime(Sys.Date(), dateOfBirth, units = "weeks")) / 52.1429, 0)) #année

Chaînes de caractères

Replace string

# Valeurs d'une colonne
data <- data |> mutate(col = str_replace_all(col, c("pattern1 | pattern2" = "replacement")))

# Attention à escape les parenthèses pour que le remplacement fonctionne
data <- data |> mutate(col = str_replace_all(col, c("string avec \\(parenthèses\\)" = "replacement")))

# Valeurs du df entier
data <- data |> mutate_all(function(x) gsub("pattern1 | pattern2", "replacement", x))

Replace character of multiple columns

data |> mutate_at(vars(January:December), ~str_replace(., ",", "."))

Upper string and remove special characters

data <- data |> mutate(col = stringi::stri_trans_general(str = gsub("-", " ", toupper(string)), id = "Latin-ASCII"))

Upper first letter of both words

library(tools)
toTitleCase(tolower("MY STRING"))
toTitleCase("my other string")

Upper first letter of first word

gsub("^(\\w)(\\w+)", "\\U\\1\\L\\2", "my other string", perl = TRUE)

Remove specific words/characters

data <- data |> mutate(col = removeWords(string, c("IEEE ", "ACM ", "SIAM ")))

Remove duplicated words

rem_dup_word <- function(x){
  #x <- tolower(x)
  paste(unique(trimws(unlist(strsplit(x, split = " ", fixed = F, perl = T)))), collapse = " ")
}
rem_dup_word(x)

Remove isolated letters/characters

data <- data |> mutate(col = gsub("\\W*\\b\\w\\b\\W*", " ", string))

Remove blank spaces at the begining (or the end)

data <- data |> mutate(col = trimws(string, which = "left"))

Remove blank spaces at the begining and at the end

data <- data |> mutate(col = str_squish(col)) #specific column
data <- data |> mutate_all(~str_squish(.)) #all character columns

Remove first digit if 0

data <- data |> mutate(num = gsub("^0", "", num))

Remove special characters (all)

# Supprimer les caractères spéciaux ex : ? ' !
data <- data |> mutate(col = str_replace_all(col, "[^[:alnum:]]", " "))

Keep first word

data <- data |> mutate(first_word = word(string, 1))

Extract digits

library(strex)
data <- data |> mutate(min = str_nth_number(string, n = 1)) # extrait le 1er chiffre du string

Extract year

data <- data |> mutate(annee = str_extract(`En quelle année ?`, "(1|2)\\d{3}")) #seulement "\\d{5}" pour zipCode

Extract n first characters

data <- data |> mutate(sub_string = substr(string, 1, n))

Extract n last characters

data <- data |> mutate(sub_string = substr(string, nchar(string)-n+1, nchar(string)))

Add digit identifiant

data <- data |> mutate(num = sprintf("%02d", num)) #passer de 1 à 2 digits
data <- data |> mutate(num = str_pad(num, 14, pad = "0")) #obliger à avoir 14 caractères (donc ajoute 0 en début si besoin)

Only 1 character in a string

nchar(string) ==  1

Regular expressions (regex)

Detect digits

str_detect(string, "[0-9]") ==  TRUE
grep("\\d+", string, value = TRUE)

Detect special characters (i.e. no letter nor digit)

grepl('[^[:alnum:]]', string)

Detect dates with format “%Y-%m-%d %H:%M:%S”

grepl("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", string)

Extract characters before first digit

str_extract(string, "^\\D+")

Extract characters before “:”

str_extract(string, "^[a-zA-Z0-9_]*")

Extract characters after “:”

str_extract(string, "(?<=: )[^\n]*")

Extract characters after “[”

str_extract(string, "(?<=\\[).*")
substr(Question, 1, nchar(Question)-1) #remove last character

Extract characters before “[”

str_extract("string bla [da]", "^.*\\[") #crochet inclu dans l'extraction

Extract characters before “(”

str_extract(string, "^[^(]+")

Extract characters before “,”

str_extract(string, "^[^,]+")

Extract characters between parenthesis

str_extract(string, "\\(.*?\\)")

Remove characters between parenthesis

str_remove(string,"\\([^)]*\\)")

Extract characters after blankspace

str_extract(string, "\\s+(.*)")

Extract all numbers and “+” character, and collapse into 1 cell

sapply(str_extract_all("+3214FSEtest!! 1", "[\\d+]+"), function(x) paste(x,collapse=""))

Replace misled zip code

# Reformater code postal mal renseigné (avec des espaces entre les 5 digits)
# Exemple : "49, avenue Du grau du rieu Marseillan Occitanie 34 340 France"
mutate(adresses = str_replace_all(adresses, "\\b(\\d{2}) (\\d{3})\\b", "\\1\\2"))

Extract domain name in URL

# Nom de domaine
mutate(domaine_url = str_extract(url, "(?<=^https?://)[^/]+"))
# Nom de domaine niveau 2 (jusqu'au 2è slash)
mutate(domaine_url2 = str_extract(url, "(?<=^https?://)[^/]+/[^/]+"))

Listes

Unlist

data <- data |>
    pull(column) |> pluck() |> bind_rows() |> 
    group_by(author_id) |> mutate(n = n()) |> select(author_id, n) |> distinct()

Complex unlist when simple pull/pluck is not working

# Exemple
comm1 <- data |> 
    filter(lengths(comments) != 0) |> 
    group_by(id) |> 
    mutate(nb_comments = nrow(comments[[1]])) |> 
    select(id2, id, comments, nb_comments) |> 
    pluck() |> bind_rows()
comm2 <- comm1 |>
    pull(comments) |> 
    pluck() |> bind_rows()
comm1.1 <- comm1 |> ungroup() |> 
    mutate(index = row_number()) |> 
    group_by(id) |> 
    slice(rep(1:n(), each = nb_comments)) |> 
    arrange(index)
proj_comm_date <- cbind(comm1.1, comm2) |> ungroup() |> 
    select(id2, author_id, publishedAt) |> 
    rename(date = publishedAt) |> mutate(type = "commentaires")

List to characters separated with comma for column in a dataframe

data |> 
    mutate(ma_col = sapply(ma_col, function(x) paste(unlist(x), collapse = ", ")))

All list columns to characters

data |> 
    mutate(across(where(is.list), ~ sapply(.x, function(y) paste(unlist(y), collapse = ", "))))

Filtres

Remove values containing string

data <- data |> filter(!grepl(',', column)) #containing comma

Keep values containing string

data <- data |> filter(grepl("mot particulier", column) ==  TRUE)
data <- data |> filter_all(all_vars(grepl("mot", .)))

Even and odd lines

data |> filter(row_number() %% 2 ==  0) # pair
data |> filter(row_number() %% 2 ==  1) # impair

All columns equal a value

data |> group_by(cat) |> filter(across(where(is.character), ~. != "N/A"))

At least 2 values by group

data |> 
    filter(any(projet %in% c("proj1234", "proj4321")), 
           .by = id)

Filter on first and second minimum values by group

data |> 
    filter((degre_etude == min(degre_etude) | degre_etude == min(degre_etude[degre_etude != min(degre_etude)])) |
           (degre_etude == min(degre_etude) | is.na(degre_etude)), 
         .by = id)

Select

Possibly select columns based on other df’s columns

data |> 
    select(any_of(names(raw_data)))

Traitement de dataframes

Formats

Pivot longer

m3 <- data |> select(c(BATIMENTS:TYPE_DE_BATIMENTS, starts_with("m3"))) |> 
    mutate_all(as.character) |> 
    pivot_longer(cols = -c(BATIMENTS:TYPE_DE_BATIMENTS), names_to = "Annee", values_to = "m3", names_prefix = "m3_")
montant <- data |> select(c(BATIMENTS:TYPE_DE_BATIMENTS, starts_with("montant"))) |> 
    mutate_all(as.character) |> 
    pivot_longer(cols = -c(BATIMENTS:TYPE_DE_BATIMENTS), names_to = "Annee", values_to = "montant", names_prefix = "montant_")
final <- cbind(m3, montant |> select(montant))

Pivot wider

data <- data |> 
    pivot_wider(names_from = choix, values_from = nb_interesses, names_prefix = "choix_")

Split comma separated values

# Split 
data <- data |> mutate(journal_issns = strsplit(as.character(journal_issns), ",")) |> unnest(journal_issns)
# Unsplit
data <- data |> mutate(journal_issns = paste0(unique(na.omit(journal_issns)), collapse = ","))

Repeat row based on 1 column number

data |> uncount(x)

Repeat row based on several column number

data.frame(col1 = c(0, 167, 73),
           col2 = c(62, 0, 73)) |> 
    slice(rep(1:n(), times = c(col2[1], col1[2], col2[3])))

Random values with specific probability

random <- c("groupe 1", "groupe 2", "groupe 3")
sample(random, size = nrow(data), replace = TRUE, prob = c(1/2,3/5,2/5))

Merge, join

Merge 2 or more columns into 1

data <- data |> mutate(new_col = coalesce(col1,col2,col3))

Fuzzy join

library(fuzzyjoin)
data <- stringdist_left_join(data, data2, by = "col_name", max_dist = 5, distance_col = "distance") |> 
  group_by(nom) |> slice_min(distance)

Merge 2 dataframes ~rbind()

data_merged <- merge(df_1, df_2, all = TRUE)

Alternative rbind() when different number of columns

data <- list(cat1, cat2) |>  bind_rows(.id = 'origine_df')

Differences 2 dataframes

anti_join(df1, df2)

Similarities 2 dataframes

semi_join(df1, df2)

Autre

Redundancy test

identical(data$id1, data$id2)

Increase total system memory R session (RAM)

library(unix) #pour linux
rlimit_as(1e20)  #increases to ~12GB

Sample random rows of a df

df[sample(nrow(df), 3), ] #pour récupérer 3 lignes

Analyse de données

Opérations, statistiques

NAs per column of a dataframe

# Une colonne
data |> count(is.na(col_name))

# Toute les colonnes
nb_NA <- as.data.frame(apply(is.na(data), 2, sum)) |> 
                       rename(`nombre de NA` = `apply(is.na(data), 2, sum)`) |>
                       mutate(pourcentage = `nombre de NA`/nrow(data)*100) |> 
                       mutate(pourcentage = round(pourcentage, 2)) |> 
                       arrange(desc(pourcentage)) |> 
    rownames_to_column() |> 
    rename(variable = rowname)

Non NAs per group

table <- data |> 
    summarise_all(list(~sum(!is.na(.))), .by = group)

Values frequency

data <- as.data.frame(table(data$column))  #R base
data <- data |> group_by(group) |> count(column)   #dplyr
data <- data |> summarise(n = n(), .by = group) #dplyr

Sum in pipe for specific rows

data |> mutate(new_cat = sum(n[Catégorie2 == "Total"])) #[]

Calculate sum accross mutliple columns df

data |> rowwise() |> mutate(sum_multiple = sum(c_across(var_3:var_17)))

Calculate difference between rows by group

data |> group_by(Structure) |> mutate(ecart = Pourcentage - lag(Pourcentage))

Weighted mean and median

# Fonction pour calculer une médiane pondérée
weighted_median <- function(x, w) {
  df <- data.frame(x = x, w = w) |>
    arrange(x)
  cum_w <- cumsum(df$w)
  cut_point <- sum(df$w) / 2
  median <- df$x[which(cum_w >= cut_point)[1]]
  return(median)
}

# Calculs pondérés
library(stats)
library(questionr)
data |> mutate(n_moyen_stats = round(weighted.mean(n_repondants, PONDFIN_logit), 2), #package stats
               n_moyen_questionr = round(questionr::wtd.mean(n_repondants, PONDFIN_logit), 2), #package questionr
               n_moyen_manuel = round(sum(n_repondants * PONDFIN_logit) / sum(PONDFIN_logit), 2), #calcul manuel
               n_median_pondere = weighted_median(n_repondants, PONDFIN_logit)) #médiane

Geocode and reverse geocode

# Geocoder (obtenir longitude et latitude à partir du code postal)
data_geoloc <- data |> 
    select(zip_code) |> 
    mutate(pays = "France") |> 
    na.omit() |> 
    geocode(postalcode = zip_code, country = pays, method = 'osm', lat = latitude , long = longitude)

# Reverse geocoder (obtenir l'adresse à partir de longitude / latitude)
data <- data_geoloc |> 
  reverse_geocode(lat = latitude, long = longitude, method = 'osm', full_results = TRUE)

Dataviz

Reorder values before plotting

# geom_bar ordre alphabetic, après arrange()
mutate(colonne = factor(colonne, levels = rev(unique(colonne)))) 
# geom_bar décroissant selon n
mutate(colonne = fct_reorder(colonne, n))

Arrange multiple plots

# Afficher plusieurs ggplots
library(gridExtra)
grid.arrange(g1,g2,g3, ncol = 3, nrow = 1, 
             top = grid::textGrob("Titre", gp = grid::gpar(fontsize = 15, font = 2)))

# Aligner les boxes
library(cowplot)
plot_grid(p3.1, p3.2, p3.3, p3.4, p3.5, align = 'vh')

# Afficher plusieurs ggplotlys
library(plotly)
subplot(plotly_positif, plotly_negatif, nrows = 1)

# Pas de message grid.arrange() dans rmd
graph <- grid.arrange(g1,g2)
grid::grid.draw(graph)

Interactive graph

# Passer en plotly
ggplotly(graph, tooltip = c("text")) |> 
    layout(xaxis = list(autorange = TRUE), yaxis = list(autorange = TRUE)) #auto adjust scale when click on element

# Passer en giraph : mettre les geométries en interactif !! ex: geom_segment_interactive()
graphc <- ggplot(data, aes(x = Réponses, y = Pourcentage, fill = Edition,
                              tooltip = paste0(Pourcentage*100, "% en ", Edition))) +#texte au survol
    geom_point_interactive() + 
    theme_minimal()
girafe(print(graph), width_svg = 15, height_svg = 12)

Highlight bar on stacked barplot with alpha

data |> 
  ggplot(aes(y = n)) +
  geom_col(aes(x = cycle, fill = rowname, alpha = cycle != "Catégorie"), color = "white", position = "stack", width = 0.7) +
  scale_alpha_manual(values=c(1, .4))

Highlight several bars on barplot with alpha

data_stat |> 
    mutate(a_surligner = case_when(type == "votes" ~ "1", type == "questionnaires"~ "1", .default = "0")) |> ungroup() |> 
    type_contrib("Les jeunes plébiscitent les outils de consultation vs. les outils de débat", "Fig. 31", 60, "") +
    geom_bar(aes(y=n, x=type, fill = a_surligner), position="dodge", stat="identity", width=.6) +
    geom_bar(aes(y=n_ref, x=type, linetype = "proportions de l'échantillon global"), 
             position="dodge", stat="identity", width=.6, color = "#666666", fill = NA, size = 1) +
    geom_label(aes(x = type, y = n+60, label = ecart), size = 5, fill = "white", label.size = NA) +
    scale_fill_manual(values = c("1" = "#83b4d1", "0" = "#cde1ec")) +
    guides(fill = "none")

Custom_theme() BBC

font <- "Helvetica"
custom_theme <- function (){
    font <- "Helvetica"
    ggplot2::theme(plot.title = ggplot2::element_text(family = font,size = 21, face = "bold", color = "#222222"), 
        plot.subtitle = ggplot2::element_text(family = font,size = 18, face = "italic", margin = ggplot2::margin(0, 0, 9, 0)), 
        plot.caption = ggplot2::element_text(family = font,size = 18, margin = ggplot2::margin(9, 0, 9, 0)), 
        plot.title.position = "plot",
        plot.caption.position = "plot",
        legend.title = ggplot2::element_text(family = font, size = 18, color = "#222222"), 
        legend.position = "top", 
        legend.text.align = 0, 
        legend.background = ggplot2::element_blank(),
        legend.key = ggplot2::element_blank(),
        legend.text = ggplot2::element_text(family = font, size = 18,color = "#222222"), 
        axis.text = ggplot2::element_text(family = font, size = 15,color = "#222222"), 
        axis.text.x = ggplot2::element_text(margin = ggplot2::margin(5,b = 10)), 
        axis.title = ggplot2::element_text(family = font, size = 18,color = "#222222"),
        axis.ticks = ggplot2::element_blank(),
        axis.line = ggplot2::element_blank(), 
        panel.grid.minor = ggplot2::element_blank(),
        panel.grid.major.y = ggplot2::element_line(color = "#cbcbcb"),
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.background = ggplot2::element_blank(),
        strip.background = ggplot2::element_rect(fill = "white"),
        strip.text = ggplot2::element_text(size = 22, hjust = 0, face = "bold"))
}
custom_theme_largeG <- function (){
    font <- "Helvetica"
    ggplot2::theme(plot.title = ggplot2::element_text(family = font,size = 25, face = "bold", color = "#222222"), 
        plot.subtitle = ggplot2::element_text(family = font,size = 18, face = "italic", margin = ggplot2::margin(0, 0, 9, 0)), 
        plot.caption = ggplot2::element_text(family = font,size = 18, margin = ggplot2::margin(9, 0, 9, 0)), 
        plot.title.position = "plot", #titre commence où y-axis commencent !!
        plot.caption.position = "plot",
        legend.title = ggplot2::element_text(family = font, size = 18, color = "#222222"), 
        legend.position = "top", 
        legend.text.align = 0, 
        legend.background = ggplot2::element_blank(),
        legend.key = ggplot2::element_blank(),
        legend.text = ggplot2::element_text(family = font, size = 18,color = "#222222"), 
        axis.text = ggplot2::element_text(family = font, size = 24,color = "#222222"), 
        axis.text.x = ggplot2::element_text(margin = ggplot2::margin(5,b = 10)), 
        axis.title = ggplot2::element_text(family = font, size = 27,color = "#222222"),
        axis.ticks = ggplot2::element_blank(),
        axis.line = ggplot2::element_blank(), 
        panel.grid.minor = ggplot2::element_blank(),
        panel.grid.major.y = ggplot2::element_line(color = "#cbcbcb"),
        panel.grid.major.x = ggplot2::element_blank(), 
        panel.background = ggplot2::element_blank(),
        strip.background = ggplot2::element_rect(fill = "white"),
        strip.text = ggplot2::element_text(size = 22, hjust = 0, face = "bold"))
}

# change grid when coord_flip()
theme(panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
      panel.grid.major.y = ggplot2::element_blank())

Par élément

Geometry

# Geométrie initiale
  geom_line(size = 1.7, alpha = 0.9, linetype = 1, color = "#0066CC") +
  geom_point(colour = "#0066CC", fill = "#0066CC", size = 2, pch = 21, stroke = 1.5) +
  geom_bar(position = position_dodge(.9), stat = "identity", width = .8, fill = "#21468d") + #.9 et width pour barres pas collées
  geom_bar(aes(x = forcats::fct_infreq(adequation))) + #fct_infreq pour ordonner selon count
  geom_col(position = "stack", width = 0.7, color = "white") +  coord_flip() + #cas particulier de geom_bar où on prend n comme Y et non count
  geom_text_wordcloud(family = "Montserrat") +
    
# Géométrie additionnelle
  geom_text(aes(y = 1, label = title_projet), hjust = "bottom", #aligner geom_text à gauche avec coord_flip
            fontface = "italic", size = 5, hjust = 0, lineheight = 0.8) + #lineheight pour régler l'interligne quand label sur plusieurs lignes
  geom_label(aes(y = 1, label = title_projet), hjust = "bottom", fontface = "italic", size = 2.6,
             fill = "white", label.size = NA, hjust = 0) + #white background and remove black borders à la fin sinon marche pas !
  geom_label(aes(y = 1, label = title_projet), hjust = "bottom", fontface = "italic", size = 2.6,
             fill = "white", label.size = NA, position = position_dodge(width = .9), hjust = 0) + #pour double barres plot (dodge)
  stat_count(geom = "text", colour = "white", size = 4,
             aes(label = ..count.., y = ..count..+.7), #y pour positionnement juste au dessus des barres
             position = position_stack(vjust = 0.5)) + #geom_text des geom_bar sans y
  geom_vline(xintercept = -.5, linetype = 2, color = "#0066CC") +
    
# Annotate a graph
coord_fixed(clip = 'off') # geom_label() déborde du graph

Scales

  xlim(1, 100) +
  scale_y_continuous(labels = scales::comma) + #grands chiffres lisibles
  scale_y_continuous(breaks = scales::pretty_breaks()) + #breaks réguliers, plus lisible (pas d'axe)
  scale_y_continuous(labels = scales::percent, limits = c(0,1)) + # pourcentages
  scale_y_discrete(limits = 1:12) + #valeurs discrètes
  scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 50)) + #axis-text trop longs sur plusieurs lignes
  scale_color_continuous(high = "#132B43", low = "#56B1F7") #reverse color
  scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10), limits = c(-.2, 10)) # de 0 à 10 avec les breaks spécifiés
  scale_fill_manual(values = c("Non, mais c’est prévu dans les 12 prochains mois" = "#e1b44d", 
                             "Oui, au cours des deux dernières années" = "#323465", 
                             "Oui" = "#33bbc9"),
                  labels = c("Non, mais c’est prévu dans les 12 prochains mois" = "Prévue dans les 12 prochains mois", 
                             "Oui, au cours des deux dernières années" = "Engagée au cours des deux dernières années", 
                             "Oui" = "Engagée"), #rename categories legend
                  breaks = c("Oui", "Oui, au cours des deux dernières années", "Non, mais c’est prévu dans les 12 prochains mois")) #order items legend

Labs

# titres trop longs, automatiquement coupés
title = stringr::str_wrap("Exemple de titre très très très très très très très très très très très très long", width = 45)

# titres avec des mots colorés
library(ggtext)
ggplot(data) +
    geom_point() +
    labs(title = "Dans mon titre je veux mettre en avant <span style='color: #323465; font-size: 23pt;'>cette catégorie</span> par rapport aux autres") +
    theme(plot.title = element_markdown()) #coloré et taille plus grande

Theme

  theme_classic() +
  theme(panel.grid.major.y = ggplot2::element_line(color = "#cbcbcb"), #lignes horizontales fond graphique en gris (BBC thème)
        strip.text.x = element_text(face = "bold"), #label des facettes
        axis.title.x = element_text(margin = margin(t = 5, r = 0, b = 5, l = 0)), #augmenter marges entre texte et labels des axes
        plot.title = element_textbox_simple(hjust = 1), #hjust: titre aligné à droite, element_textbox_simple line break auto titre
        plot.background = element_rect(fill = "#FAF3EE", colour = "#FAF3EE"), #background couleur Datactivist
        panel.background = element_rect(fill = "#FAF3EE", colour = "#FAF3EE"),
        legend.background = element_rect(fill = "#FAF3EE", colour = "#FAF3EE"),
        legend.box = "vertical", legend.box.just = "left", #multiple guide_legend each one on new row, for top legend 
        ) +

Facet

  facet_grid(Projet ~ ., 
             scales = "free", #scales = "free" pour label différents d'une facette à une autre
             space = "free") + #space = "free" pour hauteur différentes selon le nombre d'éléments par facette
  facet_zoom(x = annee > 2014, split = TRUE) +
  ggforce::facet_col(facets = vars(Projet), 
                     scales = "free_y", 
                     space = "free") # pour avoir scales et face de facet_grid avec labels to the top de facet_wrap
  facet_wrap(~vote, scales='free_x') + scale_y_continuous(limits=c(0 ,50)) # pour avoir les ticks sur chaque facettes et pas juste celles du bas

Legend

  guides(fill = guide_legend(nrow = 6, byrow = TRUE,  # nombre d'éléments par ligne
                             title = "titre légende"), # titre légende
        lwd = "none",  #ne pas afficher une légende en particulier
        col = guide_legend(title = "", reverse = TRUE, override.aes = list(lwd = 2))) + #lwd = 2 pour ligne plus épaisse et plus visible dans la légende

# Deux légendes sur un même graph, affichées l'une sous l'autre
theme(legend.box = "vertical", legend.box.just = "left")

Colors

scale_fill_manual(values = c("#c898ae", "#da4729", "#f38337", "#74a466", "#fecf5d", "#5E79AC")) #couleurs Bauhaus

Graphiques

Donut

iris |> 
    group_by(species) |> summarise(Freq = n()) |> 
    mutate(fraction = Freq / sum(Freq),
           proportion = round((Freq / sum(Freq))*100),
           ymax = cumsum(fraction),
           ymin = c(0, head(ymax, n = -1)), 
           labelPosition = (ymax + ymin) / 2) |> 
    ggplot(aes(ymax = ymax, ymin = ymin, xmax = 3.3, xmin = 2, fill = species)) +
      geom_rect(col = "white", linewidth = 2) +
      geom_text(x = 4, aes(y = labelPosition, label = paste(proportion,"%",sep = "")), color = "#333333", size = 7) +
      geom_text(aes(x = 0, y = 0, label = sum(Freq)), col = "#333333", alpha = 0.8, size = 14, fontface = "bold", inherit.aes = FALSE) +
      scale_fill_manual(values = c('#fecf5d', '#21468d','#82888d')) +
      coord_polar(theta = "y") +
      labs(title = "Titre") +
      xlim(c(0, 4)) +   
      theme_void() +
      theme(legend.position = "right",
            plot.title = element_text(face = "bold"))+
      guides(fill = guide_legend(title = ""))

Pie chart

# Table
table <- iris |> 
  summarise(nb= n(), .by = species) |> na.omit() |> 
  mutate(Pourcentage = round(nb / sum(nb) * 100, 0)) |> 
  mutate(ypos = cumsum(nb) - .5 * nb)

# Dataviz
table |> 
    ggplot(aes(x = "", y = nb, fill = species)) +  
      geom_bar(stat = "identity", width = 1, color = "white", size = 2) +
      coord_polar("y") +
      #scale_fill_manual(values = c("Oui" = "#3182bd", "Non" = "#9ecae1")) +
      labs(title = str_wrap("Répartition de l'échantillon", width = 35)) +
      theme_classic() + 
      theme(legend.position = "top",
            strip.text.x = element_text(size = 12, face = "bold"),
            strip.background = element_blank(),
            axis.title = element_blank(),
            axis.ticks = element_blank(),
            axis.line = element_blank(),
            axis.text = element_blank(),
            legend.text = ggplot2::element_text(size = 18, color = "#222222"), 
            legend.title = ggplot2::element_text(size = 18, color = "#222222"), 
            text = element_text(family = "Montserrat", size = 12),
            plot.title = ggplot2::element_text(size = 19, face = "bold", color = "#222222"), 
            plot.caption = ggplot2::element_text(face = "italic", size = 18, 
                                                 color = "#666666", margin = ggplot2::margin(9, 0, 9, 0)), 
            plot.title.position = "plot", 
            plot.caption.position = "plot") +
      guides(fill = guide_legend(reverse = T, title = "")) +
      geom_text(aes(y = ypos, x = 2, label = paste(round(Pourcentage,0), "%", sep="")), 
                color = "#333333", size=5, check_overlap = T)

Histogram

iris |> 
    ggplot() +
      aes(x = sepal_length) +
      geom_histogram(bins = 7L, col = "white", fill = "#21468d", width = 5) +
      geom_vline(xintercept = mean(iris$sepal_length, na.rm = TRUE), linetype = 2, col = "red") +
      geom_text(aes(x = mean(sepal_length, na.rm = T), y = 1, label = paste("Moyenne :", round(mean(sepal_length, na.rm = T), 1))), 
                col = "red", fontface = "italic", hjust = 0, size = 5) +
      labs(x = "Valeur",y = "Fréquence", title = "Titre",
           subtitle = paste(iris |> filter(is.na(sepal_length)) |> nrow(), "valeur manquante")) +
      theme_classic() +
      theme(plot.subtitle = element_text(face = "italic"),
            plot.title = element_text(face = "bold")) +
      scale_x_continuous(n.breaks = 10)

Histogram on multiple variables

iris |> 
  #select(1, 37:40) |> 
  reshape2::melt(id.vars = c("species")) |> 
  ggplot(aes(x = value)) +
      geom_histogram(bins = 10L, col = "white", fill = "#21468d", width = 5) +
      labs(x = "Valeur", y = "Fréquence", title = "Titre") +
      theme_classic() +
      theme(plot.title = element_text(face = "bold")) +
      facet_wrap(variable ~ .)

Histogram with normal distribution

ggplot(data = table_femmes) + 
  geom_histogram(mapping = aes(x = percent, y=..density..), fill = "#fd710f", color = "white", alpha = .7) +
  stat_function(fun = dnorm, args = list(mean = mean(table_femmes$percent), sd = sd(table_femmes$percent)), 
                size = 1, alpha = .8, aes(col = "Distribution normale")) +
  scale_colour_manual("", values = c("#cb1d27")) +
  labs(title = "Distribution de la part des femmes dans les cohortes", x = "Pourcentage de femmes", y = "Densité") +
  theme_classic() +
  scale_x_continuous(labels = scales::percent, limits = c(0,1)) +
  geom_vline(xintercept = .5, linetype = 2, col = "#666666", linewidth = .7) +
  custom_theme() +
  theme(plot.title = ggplot2::element_text(size = 18))

Barplot ordering bars with geom_bar()

iris |> 
    filter(sepal_width >= 3) |> 
    ggplot() +
      aes(x = reorder(species, species,
                         function(x)+length(x))) + #+ pour descendant, - pour ascendant
      geom_bar(fill = "#3182BD") +
      coord_flip() +
      theme_minimal() +
      theme(panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
            panel.grid.major.y = ggplot2::element_blank(),
            axis.title.y = element_blank())

Pyramid

library(lemon)
iris |> 
  filter(species !=  "setosa") |> 
  mutate(tranche_pw = cut(petal_width, c(0.1, 0.5, seq(1, 2.5, .5)))) |> 
  group_by(tranche_pw, species) |> 
  summarise(Freq = n()) |> ungroup() |> group_by(species) |> 
  mutate(percent = Freq / sum(Freq) *100,
         percent = ifelse(percent < 0.5, round(percent, 1), round(percent, 0))) |> 
  na.omit() |> 
  ungroup() |> 
  ggplot(mapping = aes(x = ifelse(species ==  "versicolor", -Freq, Freq), y = tranche_pw, fill = species)) +
      geom_col(col = "white", size = 1.3) + #col = "black"
      geom_text(aes(y = tranche_pw, x = ifelse(species ==  "versicolor", -Freq-1, Freq+1), label = paste(percent,"%",sep = "")), 
                color = "#333333", size = 3, check_overlap = T) +
      scale_x_symmetric(labels = abs) + 
      scale_colour_manual(values = c('#fecf5d', '#21468d'),
                          aesthetics = c("colour", "fill")) +
      labs(x = "Fréquence", y = "", title = "Titre") +
      theme_classic() + 
      theme(legend.position = "right",
            plot.title = element_text(face = "bold"),
            axis.title.y = element_blank(),
            axis.line.y = element_blank(),
            axis.ticks.y = element_blank()) +
      guides(fill = guide_legend(title = ""))

Bar

iris |> 
    group_by(species) |> 
    summarise(n = n()) |> ungroup() |> na.omit() |> 
    mutate(percent = round((n / sum(n))*100, 0)) |> 
    mutate(species = fct_reorder(species, n)) |> 
    ggplot(aes(x = species, y = n))+
        geom_bar(stat = "identity", width = .6, fill = "#21468d") +
        geom_text(aes(y = n+3, x = species, label = paste(percent,"%",sep = "")), 
                color = "#333333", size = 3, check_overlap = T) +
        coord_flip() +
        labs(x = "", y = "Fréquence", title = "Titre") +
        scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 70)) + #axis-text trop longs sur plusieurs lignes
        theme_classic() +
        theme(plot.title = element_text(face = "bold"),
              axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))

Radar

devtools::install_github("ricardo-bion/ggradar")  #installation impossible
library(ggradar)
iris |> 
  select(sepal_length, sepal_length:petal_width) |> 
  summarise_all(funs(mean(., na.rm = T))) |> 
  ggradar(values.radar = c("1", "3", "5"),
          grid.min = 1, grid.mid = 3, grid.max = 5,
          # Polygones
          group.line.width = 1, 
          group.point.size = 3,
          group.colours = "#21468d",
          # Arrière-plan et lignes de grille
          background.circle.colour = "white",
          gridline.mid.colour = "grey") +
  xlim(-10,10) + #selon longueur catégories
  labs(title = "Titre") +
  theme(legend.position = "none",
        plot.title = element_text(face = "bold"))

Comparison gauge

comp_data <- data.frame("variable" = c("sepal_length", "sepal_width", "petal_length", "petal_width"),
           "Minimum" = c(min(iris$sepal_length), min(iris$sepal_width), 
                         min(iris$petal_length), min(iris$petal_width)),
           "Maximum" = c(max(iris$sepal_length), max(iris$sepal_width), 
                         max(iris$petal_length), max(iris$petal_width)),
           "Moyenne" = c(mean(iris$sepal_length), mean(iris$sepal_width), 
                         mean(iris$petal_length), mean(iris$petal_width)))
comp_data |> 
  arrange(variable) |> 
  mutate(variable = factor(variable, levels = rev(unique(variable)))) |> 
  ggplot() +
    geom_segment(aes(x = Minimum, xend = Maximum, y = variable, yend = variable, colour = variable), lwd = 10) +
    geom_segment(aes(x = Moyenne, xend = Moyenne+.05, y = variable, yend = variable), 
                 colour = ifelse(comp_data$variable ==  "sepal_length", "white", "black"), lwd = 10) +
    geom_text(aes(x = Moyenne+.2, y = variable, label = paste("Moyenne :", round(Moyenne, 1)), hjust = "bottom"), 
              size = 3, col = ifelse(comp_data$variable ==  "sepal_length", "white", "#333333")) +
    labs(title = "Titre", x = "Score", y = "") +
    scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 50)) +
    scale_color_manual(values = c("petal_length" = "#21468d", 
                                  "sepal_width" = "#fecf5d", 
                                  "sepal_length" = "#d55152", 
                                  "petal_width" = "#ddd6ad")) +
    theme_bw() +
    theme(legend.position = "none", 
          plot.title = element_text(face = "bold"))

Infographie simple

library(icons)
library(tidyverse)
# Table
df <- data.frame(
    x = c(2, 8.5, 15, 21.5),
    y = rep(6.5, 4),
    h = rep(4.25, 4),
    w = rep(6.25, 4),
    value = c(5, 7, 17, 5),
    info = c("Communes",
             "Participants",
             "Jeux de données \nouverts",
             "Réutilisations"),
    color = factor(1:4)
)

# Graphique
ggplot(df, aes(x, y, height = h, width = w, label = info)) +
    ## Create the tiles using the `color` column
    geom_tile(aes(fill = color)) +
    ## Add the numeric values as text in `value` column
    geom_text(color = "white", fontface = "bold", size = 16,
              aes(label = ifelse(value > 999, format(as.integer(value, 0), nsmall = 1, big.mark = "."), value), 
                  x = x, y = y + 0.7), 
              hjust = 0.5) +
    ## Add the labels for each box stored in the `info` column
    geom_text(color = "white", fontface = "bold", size = 10,
              aes(label = info, x = x - 2.9, y = y - 1), hjust = 0) +
    coord_fixed() +
    #scale_fill_manual(type = "qual", palette = "Dark2") +
    scale_fill_manual(values = c("#9bcea4", "#ef7875", "#ffcc00", "#23ae84", "#fecf5d", "#21468d")) +
    ## Use `geom_text()` to add the icons by specifying the unicode symbol.
    theme_void() +
    guides(fill = FALSE)

Correlation matrix with ggplot

    # couleurs
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
bgcolors <- matrix("black", nrow(matrix), ncol(matrix),dimnames = dimnames(matrix))
bgcolors[,2] <- "red"
bgcolors <- bgcolors[lower.tri(bgcolors, diag=TRUE)]
    # matrice
ggcorrplot(matrix, hc.order = T, type = "lower", show.diag = TRUE, legend.title = "",
           lab = TRUE, lab_col = bgcolors, colors = c("#BB4444", "white", "#4477AA")) +
    labs(title = stringr::str_wrap("Décorrelation entre la phase <span style='color: red;'>Analyse, traitement, calcul</span> et les autres phases du cycle de vie", width = 50), 
         subtitle = "Les phases de dépôt et de partage fortement connectées entre elles", 
         caption = "Sur quelle(s) partie(s) du cycle de vie des données intervenez-vous ? - 5642 réponses") +
    theme(axis.text.y = element_blank(),
          panel.grid = element_blank(),
          plot.subtitle = element_markdown(size = 15),
          plot.title = element_markdown(size = 18, lineheight = 2),
          plot.caption = element_text(size = 15, face = "italic", hjust = 0, color = "#cbcbcb")) +
    geom_label(aes(x = 6, y = 7), label = "Dépot", family = font, vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    geom_label(aes(x = 5, y = 6), label = "Préparation du dépôt", family = font, vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    geom_label(aes(x = 4, y = 5), label = "Ouverture ou partage", family = font, vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    geom_label(aes(x = 3, y = 4), label = "Planification, PGD", family = font, vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    geom_label(aes(x = 2, y = 3), label = "Réutilisation", family = font, vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    geom_label(aes(x = 1, y = 2), label = "Analyse, traitement, calcul", family = font, vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    geom_label(aes(x = 0, y = 1), label = "Collecte, création, stockage", family = font, vjust = .4, hjust = "right", 
               fill = "white", label.size = NA) +
    coord_fixed(clip = 'off')

Positive and negative evolutions

test <- table |> 
  mutate(ecart = ecart/100,
         type_ecart = case_when(ecart >= 0 ~ "positif",
                                ecart < 0 ~ "negatif",
                                .default = NA_character_)) |> 
  select(Structure, Réponses, ecart, type_ecart) |> na.omit() 
test |> 
  ggplot(aes(x = Réponses, y = ecart, fill = type_ecart)) +
    geom_bar(stat="identity", width=.6, size = 2, alpha = .7) +
    coord_flip() +
    labs(x = "", y = "Écart en points de pourcentage", 
         subtitle = "Évolution des réponses entre l'édition 2022 et l'édition 2023",
         title = stringr::str_wrap("Commençons par la protection de la vie privée. Considérez-vous que votre structure est en conformité avec le RGPD ?", width = 55)) +
    scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 40)) + 
    scale_y_continuous(labels = scales::percent) + # pourcentages
    scale_fill_manual(values = c("positif" = "#288a28", "negatif" = "#a81008")) +
    theme_classic() +
    custom_theme() +
    theme(legend.position = "none",
          panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
          panel.grid.major.y = ggplot2::element_blank(),
          plot.subtitle = element_markdown()) +
    geom_label(aes(x = Réponses, y = ecart, label = paste0(ecart*100, "%")), 
              color = "#333333", size = 4, fill = "white", label.size = NA, 
              hjust = ifelse(test$ecart >= 0, 0, 1))

Cartography leaflet (points)

library(leaflet)
library(htmltools)

    # titre
tag.map.title <- tags$style(HTML("
  .leaflet-control.map-title { 
    transform: translate(0%,-170%);
    left: 7%;
    text-align: center;
    padding-left: 10px; 
    padding-right: 10px; 
    font-weight: bold;
    font-size: 22px;
    color: black;
  }
"))
title <- tags$div(tag.map.title, HTML("Localisation des participants aux projets (hors ZFE)"))

    # carte
proj_loin |> 
  leaflet() |>
  addTiles() |> 
  addControl(title, position = "topleft", className = "map-title") |> 
  setView(lng = 3, lat = 47, zoom = 4.8) |> 
  addCircles(radius = proj_loin$nb_contrib, lng = proj_loin$longitude, lat = proj_loin$latitude, #color = proj_loin$col, 
             weight = 1, opacity = proj_loin$nb_contrib, fillOpacity = .1, 
             label = proj_loin$nom_commune,
             popup = paste0(proj_loin$nom_commune, ", ", format(as.integer(proj_loin$nb_contrib, 0), nsmall = 1, big.mark = "."), 
                            ifelse(proj_loin$nb_contrib <= 1, " contribution dont :\n", " contributions dont :\n"),
                           "<br>• ", format(as.integer(proj_loin$questionnaires, 0), nsmall = 1, big.mark = "."), ifelse(proj_loin$questionnaires <= 1, " questionnaire", " questionnaires"),
                           "<br>• ", format(as.integer(proj_loin$propositions, 0), nsmall = 1, big.mark = "."), ifelse(proj_loin$propositions <= 1, " proposition", " propositions"),
                           "<br>• ", format(as.integer(proj_loin$commentaires, 0), nsmall = 1, big.mark = "."), ifelse(proj_loin$commentaires <= 1, " commentaire", " commentaires"),
                           "<br>• ", format(as.integer(proj_loin$votes, 0), nsmall = 1, big.mark = "."), ifelse(proj_loin$votes <= 1, " vote", " votes"))) |> 
  addProviderTiles(provider = "Esri.WorldGrayCanvas")
# all providers : http://leaflet-extras.github.io/leaflet-providers/preview/index.html

    # Carte avec les points qui se précisent au fur et à mesure du zoom
test |> 
  leaflet() |>
  addTiles() |> 
  setView(lng = 3, lat = 47, zoom = 4.8) |> 
  addMarkers(lng = test$long, lat = test$lat, 
             label = test$Concessionnaires_Dénominationsociale,
             clusterOptions = markerClusterOptions()) |> #clusterOptions fait cet effet
  addProviderTiles(provider = "Esri.WorldGrayCanvas")
saveWidget(m, file="m.html")

Cartography mapview (choropleth)

# Chargement des données
library(geojsonR)
library(httr)
temp_file <- tempfile(fileext = ".geojson")
GET("https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/georef-france-commune/exports/geojson?lang=fr&refine=reg_name%3A%22Corse%22&facet=facet(name%3D%22reg_name%22%2C%20disjunctive%3Dtrue)&timezone=Europe%2FBerlin", write_disk(temp_file, overwrite = TRUE))
communes_contours_geo <- st_read(temp_file, quiet = TRUE)

# Carto
library(mapview)
library(leafpop)
library(sf)
communes_contours_geo |> 
    select(dep_name, geometry) |> 
    mutate(dep_name = as.character(dep_name)) |> # ÉTAPE IMPORTANTE SINON "ERROR NON-NUMERIC ARG"
    na.omit() |> 
    st_as_sf() |> 
    mapview(zcol = "dep_name",
            layer.name = "Communes de Corse",
            legend = TRUE, 
            basemaps.color.shuffle = FALSE, map.types = "CartoDB.Positron",
            col.regions = c("Corse-du-Sud" = "#b5dbfb", "Haute-Corse" = "#1d82df"),
            popup = popupTable(communes_contours_geo, zcol = c("dep_name")))

Combinaison points and choropleth carto

library(mapview)
library(leafpop)
library(leaflet)
map <- mapview(data1$geometry, 
        alpha.regions = .5, 
        col.regions = "#CC0000", #fill color zones géo
        lwd = 2, #épaisseur bordure zones géo
        layer.name = "Territoires d'étude", #nom s'affichant en bas à droite
        label = data1$com_name, 
        popup = popupTable(data1, zcol = c("com_name")),
        legend = TRUE) +
mapview(st_as_sf(na.omit(data_geoloc2), coords = c("longitude", "latitude"), crs = 4326),
        layer.name = "Commerces répondants",
        col.regions = "#3182BD", 
        alpha.regions = .5,
        cex = "n", 
        popup = data_geoloc2$nb_commerces, #texte quand on clique sur un point
        label = data_geoloc2$nb_commerces, #texte au survol de la souris
        legend = FALSE)

# Conversion en objet leaflet pour ajouter un titre
leaflet_map <- map@map %>% 
  addControl(
    html = "<h2 style='text-align:center; color: darkblue;'>Répartition géographique des commerces répondants</h2>",
    position = "topright"
  )
leaflet_map

Legend out of graph

# Légende
graph_leg <- table_all |> 
filter(Structure == "Une commune" | Structure == "Un EPCI" | Structure == "Un département" | 
       Structure == "Une région" | Structure == "Autre") |> 
mutate(Structure = factor(Structure, levels = c("Autre","Une région","Un département","Un EPCI","Une commune"))) |> 
ggplot(aes(x = Structure, y = Pourcentage, fill = Réponses,
           text = paste0(Pourcentage*100, "% de réponses '", Réponses, "'"))) + 
  geom_bar(position = "fill", color = "white", stat="identity", alpha = .9) +
  labs(x = "", y = "Pourcentage de réponses", title = "Par type de collectivité") +
  scale_y_continuous(labels = scales::percent, limits = c(0,1)) +
  scale_fill_manual(values = c("grey", "#bf0001", "#e1b44d", "#33bbc9", "#323465", "#616165", "#0a0a14")) +
  facet_wrap(~Edition) +
  coord_flip() +
  theme_classic() +
  custom_theme() +
  guides(fill = guide_legend(ncol = 1, title = "", reverse = T)) +
  theme(legend.position = "top",
            panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
            panel.grid.major.y = ggplot2::element_blank())
legend <- get_legend(graph_leg + theme(legend.box.margin = margin(0, 0, 10, 12)))

Dual axis on ggplot

table |> 
    mutate(timestamp = as.Date(as.character(paste0(timestamp, "-01")))) |> 
  ggplot(aes(x = timestamp
             #text = paste(n_cumule, "inscrits en", format(timestamp, "%b %Y"))
             )) +
  geom_line( aes(y=n_cumule), size=1, alpha=0.9, color = "#3366CC") + 
  geom_line( aes(y=n_cumule_contrib/3.785737), size=1, alpha=0.9, color = "#CC0000") +
  labs(x = "Temps", y = "Nombre d'inscrits", 
       title = stringr::str_wrap("Évolution du nombre d'inscrits et du nombre de contributions cumulés sur la plateforme depuis son lancement", width = 40)) +
  scale_y_continuous(name = "Nombre total d'inscrits",
    sec.axis = sec_axis(~ . * 3.785737, name = "Nombre total de contributions")) +  #scale_x_date(date_labels = "%Y %b") +
  theme_classic() +
  custom_theme() +
  theme(legend.position = "right",
        axis.title.y = element_text(color = "#3366CC"),
        axis.title.y.right = element_text(color = "#CC0000"))

Treemap

library(treemap)
library(treemapify)
table |> 
  filter(row_number() <= 10) |> 
  ggplot() +
      geom_treemap(aes(area = effectif_couple_spe, fill = couple_spe), col = "white", size = 4) +
      geom_treemap_text(aes(area = effectif_couple_spe, fill = couple_spe, 
                            label = paste0(couple_spe, "\n(", percent, "%)")),
                        colour = "white", place = "centre", size = 15, grow = TRUE) +
      scale_fill_manual(values = c("#345E68", "#FEDEA0", "#B7C2A5", "#023743","#7A9BB1", "#B8AA75", "#7B8598", "#345B48", "#476F84", "#D0BA7C")) +
      labs(title = "Les 10 principales spécialités au BAC choisies par les étudiants d'Ile-de-France") +
      custom_theme() +
      theme(legend.position = "none")

Customized lolipop chart

library(ggtext)
table |> 
    mutate(var = fct_reorder(var, n)) |> 
    ggplot( aes(x=var, y=n)) +
    geom_segment(aes(xend=var, yend=0), 
                 color=ifelse(table_collab_treemap$var == "Associations de quartier", "#3182BD", "grey"), 
                 size=ifelse(table_collab_treemap$var == "Associations de quartier", 1.3, 0.7)) +
    geom_point(color=ifelse(table_collab_treemap$var == "Associations de quartier", "#3182BD", "grey"), 
               size=ifelse(table_collab_treemap$var == "Associations de quartier", 5, 2)) +
    theme_bw() +
    coord_flip() +
    labs(title = "Les <span style='color: #3182BD'>associations de quartier</span> sont les principales <br>associations avec lesquelles les commerçants <br>s'associent", #str_wrap() ne marche plus avec le element_markdown()
         subtitle = "Collaborations des commerçants avec des associations \nlocales",
         y = "Nombre de commerces") +
    scale_x_discrete(labels = function(x) stringr::str_wrap(x, width = 30)) +
    custom_theme() +
    theme(legend.position = "none", 
          axis.title.y = element_blank(),
          panel.grid.major.x = ggplot2::element_line(color = "#cbcbcb"),
          panel.grid.major.y = ggplot2::element_blank(),
          plot.title = element_markdown()) +
    ggplot2::annotate("text",
             x = grep("Associations de quartier", table_collab_treemap$var),
             y = table_collab_treemap$n[which(table_collab_treemap$var=="Associations de quartier")]*1.1,
             label = paste0(round(percent_collab_quartier$percent, 0), "% des collaborations \nse font avec des \nassociations de quartier"),
             color = "#3182BD", size=4 , angle=0, fontface="bold", hjust=0)

Tables

`gttable()`

library(gt)
library(gtExtras)
data |> 
    # Transformation de la colonne externalité pour mettre le picto
    mutate(` ` = case_match(Externalité,
                                    "Lien social" ~ "images/lien_social.png",
                                    "Solidarités" ~ "images/solidarites.png",
                                    "Vie de quartier" ~ "images/vie_quartier.png",
                                    "Santé et sécurité" ~ "images/sante_securite.png",
                                    "Environnement" ~ "images/environnement.png",
                                    "Espace public" ~ "images/espace_public.png")) |> 
    relocate(` `) |> 
    # Ordre des lignes et groupes
    mutate(Externalité = factor(Externalité, 
                                levels = c("Lien social","Solidarités","Vie de quartier","Santé et sécurité","Environnement","Espace public"))) |> 
    mutate(ordre = ifelse(Indicateur == "% de commerces combinant les 3 externalités", 2, 1)) |> 
    arrange(Externalité, ordre) |>  # Tri par externalité, puis indicateur
    select(-ordre) |> 
    # GT TABLE
    gt(groupname_col = "Externalité") |> 
    # Titres
    tab_header(title = md("**Comparaison territoriales des externalités positives du commerce**")) |>
    tab_source_note(source_note = md("*Données d'une enquête diffusée d'avril à novembre 2024 auprès de **324 commerces** de France entière.*")) |> 
    # Rassemblement des chiffres par territoires
    tab_spanner(label = md("**Paris**"), columns = c(`Nombre de réponses`, `%`)) |> 
    tab_spanner(label = md("**St Ouen**"), columns = c(` Nombre de réponses`, ` %`)) |> 
    tab_spanner(label = md("**Rouen**"), columns = c(`Nombre de réponses `, `% `)) |> 
    # Stype de la table
    tab_style(style = list(cell_text(weight = "lighter")), 
              locations = cells_body(columns = Indicateur)) |>  
    # Couleur des indicateurs récapitulatifs
    tab_style(style = list(cell_fill(color = "lightgrey", alpha = 1)),
              locations = cells_body(columns = everything(), 
                                     rows = Indicateur == "% de commerces combinant les 3 externalités")) |> 
    # Intégration des pictos externalités
    text_transform(locations = cells_body(columns = " "),
                   fn = function(x) {
                      local_image(
                        filename = x,
                        height = 27)
                    }) |> 
    cols_width(` ` ~ "5%") |> 
    # % en barres
    gt_plt_bar_pct(`%`, scaled = TRUE, labels=TRUE, decimals = 0, 
                   font_size = "14px", fill = "#343333", height = 20) |> 
    gt_plt_bar_pct(` %`, scaled = TRUE, labels=TRUE, decimals = 0, 
                   font_size = "14px", fill = "#343333", height = 20) |> 
    gt_plt_bar_pct(`% `, scaled = TRUE, labels=TRUE, decimals = 0, 
                   font_size = "14px", fill = "#343333", height = 20) |> 
    # couleur des noms de groupes (familles d'externalités)
    tab_style(style = list(cell_fill(color = "#B4B1B1", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Lien social")) |> 
    tab_style(style = list(cell_fill(color = "#004654", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Solidarités")) |> 
    tab_style(style = list(cell_fill(color = "#0097B2", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Vie de quartier")) |> 
    tab_style(style = list(cell_fill(color = "#00A589", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Santé et sécurité")) |>
    tab_style(style = list(cell_fill(color = "#E1B441", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Environnement")) |>
    tab_style(style = list(cell_fill(color = "#FF5757", alpha = .4),
                           cell_text(weight = "bold")),
              locations = cells_row_groups(groups = "Espace public")) |> 
    tab_style(style = list(cell_borders(sides = c("t", "b"), color = "white", weight = px(2))),
              locations = cells_row_groups()) |> 
    # centrage des colonnes
    cols_align(align = "center", columns = c(`Nombre de réponses`, `%`, ` Nombre de réponses`, ` %`, `Nombre de réponses `, `% `)) |> 
    # retirer la ligne horizontale au-dessus du titre
    tab_options(table.border.top.style = "none", 
                table.border.top.width = px(0)) |>    
    # table dans une boxe avec barre de défilement verticale
    tab_options(table.width = "100%",         # Largeur de la table
                container.overflow.x = "auto", # Scroll horizontal si nécessaire
                container.overflow.y = "auto", # Scroll vertical si nécessaire
                container.height = px(600)) |>     # Hauteur fixe avec défilement vertical
    # fixe les noms de colonnes et titre 
    opt_css(css = "
      /* Fixer le titre */
      .gt_title, .gt_subtitle {
        position: sticky;
        top: 0; 
        background-color: #f9f9f9; /* Couleur de fond pour le titre */
        z-index: 2;               /* Met le titre au-dessus */
        padding: 5px;             /* Ajoute un peu de marge interne */
      }
      
      /* Fixer les noms des colonnes */
      thead th {
        position: sticky;
        top: 30px;                /* Ajuste en fonction de la hauteur du titre */
        background-color: #ffffff; /* Couleur de fond pour l'en-tête */
        z-index: 1;               /* Met les noms de colonnes devant les lignes */
      }")

`DT::datatable()`

datatable(data, options = list(pageLength = 5, scrollX = TRUE))

`knitr::kable()`

knitr::kable(stat_indiv, format = "html") |> 
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))

`reactable()`

Colorize cells based on categorical column

library(reactable)

# Fonction pour colorer les catégories
category_styles <- function(Catégorie) {
  case_when(
    Catégorie == "Faible" ~ list(background = "#e3f2fd", color = "black"),
    Catégorie == "Moyen" ~ list(background = "#b5dbfb", color = "black"),
    Catégorie == "Élevé" ~ list(background = "#1d82df", color = "white"),
    Catégorie == "Très élevé" ~ list(background = "#0d47a1", color = "white"),
    Catégorie == "NA" ~ list(background = "#CCCCCC", color = "black"),
    TRUE ~ list(background = NA, color = NA)
  )
}

# Affichage de la table
table |> 
    reactable(columns = list(Catégorie = colDef(style = function(value){category_styles(value)})))

Colorize all columns except one based on numerical column

# Indicateurs en une table
table <- data.frame(
  Nom = c("nom1", "nom3", "nom3"),
  Score = c(85, 92, 78),
  Age = c(25, 30, 22),
  Height = c(160, 175, 168)
)

# Fonction pour colorer les indicateurs
indicateur_styles <- function(Indicateur) {
    # calcul des statistiques
  q1 <- quantile(Indicateur, 0.25, na.rm = TRUE)
  med <- median(Indicateur, na.rm = TRUE)
  q3 <- quantile(Indicateur, 0.75, na.rm = TRUE)
    # conditions de mise en forme
  sapply(Indicateur, function(value) {
    dplyr::case_when(
      value <= q1 ~ "background-color: #e3f2fd; color: black;",
      value > q1 & value <= med ~ "background-color: #b5dbfb; color: black;",
      value > med & value <= q3 ~ "background-color: #1d82df; color: white;",
      value > q3 ~ "background-color: #0d47a1; color: white;",
      TRUE ~ "background-color: #CCCCCC; color: black;"
    )
  })
}

# Liste des colonnes sur lesquelles appliquer le style
column_defs <- setNames(
  lapply(names(table), function(col) {
    colDef(
      style = if (col != "Nom") {
        function(value, index, name) {
          indicateur_styles(table[[col]])[index]
        }
      } else {
        NULL
      }
    )
  }),
  names(table)
)

# Affichage de la table
table |> 
    reactable(columns = column_defs, defaultPageSize = 5)

Freeze first column

# Fixer la première colonne
data |> reactable(columns = list(ID = colDef(sticky = "left")))

# Fonction pour fixer la première colonne en plus d'appliquer aux autres colonnes la mise en forme des couleurs
column_defs <- setNames(
  lapply(names(table), function(col) {
    if (col != "ID") {
      colDef(
        style = function(value, index, name) {
          indicateur_styles(table[[col]])[index]
        }
      )
    } else {
      colDef(
        sticky = "left"
      )
    }
  }),
  names(table)
)

Export reactable as HTML and JPG

# Export de la table
library(htmlwidgets)
library(webshot2)
saveWidget(widget = table_rea, file = "../mon_path/data.html", selfcontained = TRUE)
webshot2::webshot("../mon_path/data.html", "../mon_path/data.jpg", 
                  vwidth = 1200, vheight = 600)

Fin d’analyse

Export

Export CSV

rio::export(data, "~/Downloads/tableau.csv")
write.csv(data, "~/Downloads/tableau.csv", row.names = FALSE, fileEncoding = "UTF-8")

Export plots

saving_plot <- function(graph, name, width, height) {
  ggsave(file = glue("~/Downloads/SVG/{name}.svg"), plot = graph, width = width, height = height)
  ggsave(file = glue("~/Downloads/PNG/{name}.png"), plot = graph, width = width, height = height)
}
saving_plot(graph, "histogram", 9, 5)

Export html objects (leaflet map, plotly)

library(htmlwidgets)
saveWidget(map, file = "ma_carte.html")

R Markdown

Footer, CSS, header files in other folder

includes:
    in_header: !expr here::here("inst/rmarkdown/resources/header.html")

Logo / image at top of the document

# Logo haut de page
htmltools::img(src = "lien/vers/mon/image", 
               alt = 'logo', 
               style = 'position:absolute; top:0; right:0; width:400px') #width pour la taille! (ici positionné en haut à droite)

Output file into ‘reports’ folder

# Mettre dans le header du document RMD
knit: (
  function(inputFile, encoding) { 
    rmarkdown::render(inputFile, params = "ask",  
      encoding    = encoding,
      output_dir = "../reports", 
      output_file = paste0(tools::file_path_sans_ext(inputFile), ".html")) })

Justify text

# Mettre en corps de texte du RMD ou dans un fichier CSS à part
<style>
body {
text-align: justify
}
</style>

Change font, color and indent text

# Mettre en corps de texte du RMD 
<p style="margin-left: 20px; font-size: 2em; color: #304B95;">**Simulateur**</p>

Custom dfsummary

print(dfSummary(data_summary, style = "grid", graph.magnif = 1, 
                valid.col = FALSE, varnumbers = FALSE, tmp.img.dir = "/tmp", 
                max.distinct.values = 5, headings = FALSE, method = "render", 
                col.widths  = c(300, 200, 100, 50, 20)),
      max.tbl.height = 600,
      method = "render")

Use installed font family

library(showtext)
font_add("Nexa", regular = "Nexa Bold.otf")
font_add("Trade Gothic", regular = "Trade Gothic.otf")
showtext_auto()
# then specify on the CSS the name of the font

TOC

output:
    rmarkdown::html_document:
      toc: true
      toc_float: true
      toc_depth: 2
      number_sections: true
# {-} après certains titres si on veut enlever le numérotage automatique pour ceux-là

Python on Rmd

# En début de Rmd
library(reticulate)

# Première utilisation
py_install("pandas") #pandas
pip install plotly #plotly, à runer dans le terminal

# Pour utiliser un environnement virtuel
Sys.setenv(RETICULATE_PYTHON = "path/to/python.exe") 
virtualenv_create("test_proj")
py_install("pandas", envname = "test_proj", method = "auto")
use_virtualenv("test_proj")

#```{python}
import pandas
import plotly.express as px
matrice = [[43, 57], [12, 88]]
fig = px.imshow(matrice)
fig.show()
#```

Remove blank space end RMD file

<div class = "tocify-extend-page" data-unique = "tocify-extend-page" style = "height: 0;"></div>

RMD collaboratif

# Déposer Rmd sur GDrive pour travailler en collaboration   
    # LE METTRE DANS UN DOSSIER TRACKDOWN ET LE NOM EN LIGNE DOIT GARDER L'EXTENSION .RMD
trackdown::upload_file(file = "scripts/Rapport_final.Rmd", gfile = "Rapport_final.Rmd")
trackdown::download_file(file = "scripts/Rapport_final.Rmd", gfile = "Rapport_final.Rmd")

Insert and center image in Rmd page

<p align="center">
 <img src="../figures/graphique.png" width = "110">
</p>
    #ou
![](../figures/graphique.png){fig-align="center"} #en corps de texte

Embed HTML in Rmd page

Code à mettre en corps de texte pour que ça run :

# Embed centré, html en ligne (ex: carte ODS)
<div align="center">
<iframe frameborder="0" width="800" height="600" src="lien/vers/mon/graphique"></iframe>
</div>

# HTML en local
<iframe src="Save_cartos/carte_p17_t1.html" height="600" width="1000" style="border: 0px solid #464646;" allowfullscreen="" allow="autoplay" data-external="1"></iframe>

Attention bien mettre “self_contained: false” dans le header. Ouvrir dans un browser pour voir le résultat.

Autre solution :

knitr::include_url(glue('../figures/tableau_global/tableau_global_{name_salarie}.html'))

Mettre ça dans <style> en corps de texte du RMD pour enlever la bordure noire.

iframe {
  border: none;
}

Documentation

Pimp my rmd, Yan Holtz

Git

Cancel last commit NOT pushed

git reset HEAD~1

Cancel last commit pushed

git revert HEAD

Merge diverging branches

git config pull.rebase false

Stash local changes for specific file

git stash push
git stash push scripts/tableau_de_bord.html #specific file

git stash push data/indicateurs_Sarah/CRM_odoo_dashboard.csv
git stash push data/indicateurs_Sarah/Suivi_budget_previsionnel.html
git stash push data/indicateurs_Sarah/table_budget_previsionnel.csv
git stash push data/indicateurs_Sarah/table_tjh.csv

Stash local changes for all files

git stash # annuler le dernier commit
git stash push --include-untracked # supprimer tous les changements locaux (pull possible ensuite)

Quand message “needs merge” :

sélectionner les fichiers en conflit (ex : data/table_tjh.csv)
les commit
push

Github actions

Install package from github (not from CRAN)

Télécharger le repo contenant le package via l’URL : https://github.com/account/reponame/archive/branchname.tar.gz
Le sauvegarder dans un dossier du repo de travail actuel
L’installer via cette commande à ajouter au .yaml de l’action (name: install icons package, sans oublier d’installer le package remotes)

# Intégrer dans .github/workflows/render-document.yaml
      - uses: r-lib/actions/setup-r-dependencies@v2
        with:
          cache-version: 2
          packages:
            any::knitr
            any::tidyverse
            any::flexdashboard
            any::remotes
            
      - name: install icons package
        run: |
          Rscript -e "remotes::install_url('https://raw.githubusercontent.com/datactivist/plans_de_charge/main/.github/workflows/master.tar.gz')"

Divers

Bash commands in R

system(glue("cat {in_dir}/raw_data_{year}.jsonl | jq -c '{{doi, year, bso_classification, hal_id}}' | jq --slurp > {out_dir}/unnested_data_{year}.json"))

Parallel execution command

library(parallel)
library(doParallel)
library(foreach)
years <- 2013:2020

numCores <- 2
registerDoParallel(numCores)

foreach (year = years) %dopar% {
  ma_fonction(year)
}

stopImplicitCluster()

Execution time of a command

start.time <- Sys.time()
# R code here
end.time <- Sys.time()
round(end.time - start.time,2)

Icônes

Icons pages

https://fontawesome.com/search?o=r&m=free

https://ionic.io/ionicons

https://jpswalsh.github.io/academicons/

Icons into rmarkdwon

, meilleure solution (bibliothèque d’icones).

#`r icon_style(emojifont::fontawesome("play", style = NULL), fill = "#0000CC")`, implique ce chunk en début de Rmd ([bibliothèque d'icones](https://fontawesome.com/v4/icons/)) :


#remotes::install_github("mitchelloharawild/icons", force = TRUE)
library(icons)
#download_fontawesome()
library(extrafont)

Illustrations libres de droits

https://cocomaterial.com/

Fonctions créées

Export object created within function

assign(glue("n_{year}"), n, envir = .GlobalEnv)

Detach created object from environment

rm(ls = inter_bv, ratio, area_commune, area_2017)

Possibly function treatment

tryCatch(ma_fonction(data), error = function(e) NULL)

Use function saved in other directory

source(here("functions", "match_commune.R"))
object <- memoise::memoise(match_commune, cache = memoise::cache_filesystem(here("cache")))

Call variable within created function

fonction <- function(data, variable){
    data |> filter({{variable}} == 2)
}

Rename column wihtin created function

fonction <- function(new_cols, cols){
    data |> rename({{ new_cols }} := {{ cols }}) 
}

Huge utilisation created function

library(purrr)
purrr::map(.x = c(13:16, 18, 26:38, 40:44, 46, 49, 55, 66:68, 70, 84:90, 93:96, 99:108, 111:113, 115, 139, 162:178, 180, 181, 188:197),
          .f = ~table_recap_simple(.x))
table_recap_stat <- rbind(lapply(ls(pattern="^all_stat_"), function(x) get(x))) |> 
    bind_rows()

Create column if it does not already exist

datat <- data |> mutate(`NA` = ifelse("NA" %in% names(data), `NA`, "0%"))

“Error in auto_copy(): ! x and y must share the same src”

Dans une fonction créée, lorsqu’une base de données est appelée sans être mise comme argument attendu, il faut l’ajouter comme argument de fonction.

Get statistics within function

inter_categories <- function(data, variable){
    # Calcul
    min_val <- min(data[[variable]], na.rm = TRUE) #data[[variable]] pour accéder à la variable dans un df
    q1 <- quantile(data[[variable]], 0.25, na.rm = TRUE)
    med <- median(data[[variable]], na.rm = TRUE)
    q3 <- quantile(data[[variable]], 0.75, na.rm = TRUE)
    max_val <- max(data[[variable]], na.rm = TRUE)
    
    # Assignation à l'environnement global
    assign("min_val", min_val, envir = .GlobalEnv) #nom d'objet "min_val" et pas "min" pour pas crééer de conflit avec le nom de fonction
    assign("q1", q1, envir = .GlobalEnv)
    assign("med", med, envir = .GlobalEnv)
    assign("q3", q3, envir = .GlobalEnv)
    assign("max_val", max_val, envir = .GlobalEnv)
}
inter_categories(table, "Densité (habitant par m²)") #nom de variable entre guillemets et pas backticks sinon est considéré comme objet à part et non une variable du jeu de données

Automatic tab for elements of a list in a flexdashboard document

# voir script ici [repo privé] : https://github.com/datactivist/plans_de_charge/blob/main/scripts/tableau_de_bord.Rmd
# ATTENTION : ne pas mettre de caractères spéciaux (&.) dans les noms data-navmenu=""

Document sous licence ouverte réalisé par Diane Thierry

diane@datactivist.coop

Document sous licence Creative Commons

Commandes utiles

Mis à jour le 21 mars 2025