library(fakir)
library(dplyr)
library(ggplot2)
library(sf)
The database fakes an after-sale client database for a Phone company. There is:
a client database with all characteristics of the clients.
a ticket database which contains all calls to the after-sale service of some clients having problems
Ticket centered dataset with already joined client characteristics
fake_ticket_client(vol = 10)
#> old-style crs object detected; please recreate object with a recent sf::st_crs()
#> # A tibble: 10 × 25
#> ref num_client first last job age region id_dpt departement
#> <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
#> 1 DOSS-AMQN-002 79 Jovan O'Ke… Gene… 22 Île-d… 77 <NA>
#> 2 DOSS-NCKJ-010 69 Miss Lean… Emer… 68 <NA> 25 Doubs
#> 3 DOSS-GPBE-009 120 Odell Stok… Engi… 24 <NA> 17 Charente-M…
#> 4 DOSS-GRLN-001 31 Loren Lars… <NA> NA <NA> 33 <NA>
#> 5 DOSS-LEPJ-004 59 Maybelle Maye… Furt… 18 <NA> 33 <NA>
#> 6 DOSS-DUCL-005 118 Jamarion Ober… Engi… 18 Langu… 48 <NA>
#> 7 DOSS-OCED-003 77 Lee Scha… Admi… NA Poito… 17 Charente-M…
#> 8 DOSS-KXSJ-007 65 Demetric Auer Cont… 21 Pays … 49 <NA>
#> 9 DOSS-UITD-006 141 Wilfrid Harv… Educ… 53 <NA> 81 Tarn
#> 10 DOSS-SHKL-008 182 Addyson Nien… Earl… 65 Poito… 17 Charente-M…
#> # ℹ 16 more variables: cb_provider <chr>, name <chr>, entry_date <dttm>,
#> # fidelity_points <dbl>, priority_encoded <dbl>, priority <fct>,
#> # timestamp <date>, year <dbl>, month <dbl>, day <int>, supported <chr>,
#> # supported_encoded <int>, type <chr>, type_encoded <int>, state <fct>,
#> # source_call <fct>
<- fake_ticket_client(vol = 100, split = TRUE)
tickets_db #> old-style crs object detected; please recreate object with a recent sf::st_crs()
tickets_db#> $clients
#> # A tibble: 200 × 14
#> num_client first last job age region id_dpt departement cb_provider
#> * <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
#> 1 1 Solomon Heaney Civi… 53 Champ… 51 Marne Diners Clu…
#> 2 2 Karma William… Scie… 81 Auver… 63 Puy-de-Dôme VISA 13 di…
#> 3 3 Press Kulas Anim… NA Prove… 06 Alpes-Mari… <NA>
#> 4 4 Laken McDermo… <NA> NA Breta… 56 Morbihan <NA>
#> 5 5 Sydnie Jaskols… Hort… 30 Centre 36 <NA> <NA>
#> 6 6 Clayton Runolfs… Comm… NA Prove… 04 <NA> Diners Clu…
#> 7 7 Roberta Purdy-W… Fina… 60 Île-d… 91 Essonne <NA>
#> 8 8 Dr. RonaldM… Astr… 30 Rhône… 42 Loire <NA>
#> 9 9 Miss Alondra… Occu… 18 Aquit… 24 Dordogne Diners Clu…
#> 10 10 Vernice Ondrick… Clin… 19 Limou… 87 Haute-Vien… <NA>
#> # ℹ 190 more rows
#> # ℹ 5 more variables: name <chr>, entry_date <dttm>, fidelity_points <dbl>,
#> # priority_encoded <dbl>, priority <fct>
#>
#> $tickets
#> # A tibble: 100 × 10
#> ref num_client year month day timestamp supported type state
#> <chr> <chr> <dbl> <dbl> <int> <date> <chr> <chr> <fct>
#> 1 DOSS-GFEL-0028 1 2016 12 21 2016-12-21 Non Insta… Term…
#> 2 DOSS-UWYV-0016 22 2020 10 12 2020-10-12 Non Insta… Atte…
#> 3 DOSS-DKFC-0073 9 2020 11 16 2020-11-16 Non Insta… Term…
#> 4 DOSS-SAYJ-0047 8 2020 12 1 2020-12-01 Non Box Atte…
#> 5 DOSS-GSMZ-0080 30 2020 12 18 2020-12-18 Oui Insta… Inte…
#> 6 DOSS-UIOZ-0085 10 2020 12 30 2020-12-30 Oui Insta… Atte…
#> 7 DOSS-DSMI-0065 37 2021 1 27 2021-01-27 Non Ligne Atte…
#> 8 DOSS-JOYV-0029 37 2021 3 19 2021-03-19 Non Box Atte…
#> 9 DOSS-WPSG-0013 24 2021 3 26 2021-03-26 Non <NA> En c…
#> 10 DOSS-NHFG-0036 12 2021 4 12 2021-04-12 Non Insta… Atte…
#> # ℹ 90 more rows
#> # ℹ 1 more variable: source_call <fct>
ggplot(tickets_db$clients) +
aes(x = entry_date, y = fidelity_points) +
geom_point() +
geom_smooth()
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(tickets_db$tickets) +
aes(x = type) +
geom_bar()
ggplot(tickets_db$tickets) +
aes(x = state) +
geom_bar()
fra_sf
.
{sf} package must be loaded.<- tickets_db$clients %>%
clients_map group_by(id_dpt) %>%
summarise(
number_of_clients = n(),
average_fidelity = mean(fidelity_points, na.rm = TRUE)
%>%
) full_join(fra_sf, by = "id_dpt") %>%
st_sf()
#> old-style crs object detected; please recreate object with a recent sf::st_crs()
ggplot(clients_map) +
geom_sf(aes(fill = average_fidelity)) +
scale_fill_viridis_c() +
coord_sf(
crs = 2154,
datum = 4326
)
count(
fake_products(10),
category
)#> # A tibble: 7 × 2
#> category n
#> <chr> <int>
#> 1 Awesome 1
#> 2 Entertainment 1
#> 3 Fitness 1
#> 4 Industrial 1
#> 5 Lifestyle 3
#> 6 Medical 2
#> 7 Pets and Animals 1
fake_visits(
from = "2017-01-01",
to = "2017-01-31"
)#> # A tibble: 31 × 8
#> timestamp year month day home about blog contact
#> * <date> <dbl> <dbl> <int> <int> <int> <int> <int>
#> 1 2017-01-01 2017 1 1 369 220 404 210
#> 2 2017-01-02 2017 1 2 159 250 414 490
#> 3 2017-01-03 2017 1 3 436 170 498 456
#> 4 2017-01-04 2017 1 4 NA 258 526 392
#> 5 2017-01-05 2017 1 5 362 NA 407 291
#> 6 2017-01-06 2017 1 6 245 145 576 90
#> 7 2017-01-07 2017 1 7 NA NA 484 167
#> 8 2017-01-08 2017 1 8 461 103 441 NA
#> 9 2017-01-09 2017 1 9 337 113 673 379
#> 10 2017-01-10 2017 1 10 NA 169 308 139
#> # ℹ 21 more rows
fake_survey_answers(n = 10)
#> old-style crs object detected; please recreate object with a recent sf::st_crs()
#> # A tibble: 30 × 12
#> id_individu age sexe region id_departement nom_departement
#> <chr> <int> <chr> <chr> <chr> <chr>
#> 1 ID-NYDZ-010 NA <NA> <NA> 55 <NA>
#> 2 ID-NYDZ-010 NA <NA> <NA> 55 <NA>
#> 3 ID-NYDZ-010 NA <NA> <NA> 55 <NA>
#> 4 ID-PWLB-009 71 F Rhône-Alpes 38 Isère
#> 5 ID-PWLB-009 71 F Rhône-Alpes 38 Isère
#> 6 ID-PWLB-009 71 F Rhône-Alpes 38 Isère
#> 7 ID-NMQG-001 42 M Midi-Pyrénées 82 Tarn-et-Garonne
#> 8 ID-NMQG-001 42 M Midi-Pyrénées 82 Tarn-et-Garonne
#> 9 ID-NMQG-001 42 M Midi-Pyrénées 82 Tarn-et-Garonne
#> 10 ID-RJXN-002 71 O <NA> 17 Charente-Maritime
#> # ℹ 20 more rows
#> # ℹ 6 more variables: question_date <dttm>, year <dbl>, type <chr>,
#> # distance_km <dbl>, transport <fct>, temps_trajet_en_heures <dbl>
fake_survey_answers(n = 10, split = TRUE)
#> old-style crs object detected; please recreate object with a recent sf::st_crs()
#> $individus
#> # A tibble: 10 × 8
#> id_individu age sexe region id_departement nom_departement
#> <chr> <int> <chr> <chr> <chr> <chr>
#> 1 ID-NYDZ-010 NA <NA> Basse-Normandie 14 Calvados
#> 2 ID-PWLB-009 71 F Corse 2A Corse-du-Sud
#> 3 ID-NMQG-001 42 M <NA> 68 Haut-Rhin
#> 4 ID-RJXN-002 71 O Rhône-Alpes 01 Ain
#> 5 ID-MROK-007 41 M Basse-Normandie 14 Calvados
#> 6 ID-VMKS-004 33 O Lorraine 54 Meurthe-et-Mos…
#> 7 ID-XEMZ-003 81 O Provence-Alpes-Côte d… 84 Vaucluse
#> 8 ID-EUDQ-005 44 M Champagne-Ardenne 10 <NA>
#> 9 ID-DCIZ-008 92 O Aquitaine 64 Pyrénées-Atlan…
#> 10 ID-KPUS-006 57 O <NA> 54 Meurthe-et-Mos…
#> # ℹ 2 more variables: question_date <dttm>, year <dbl>
#>
#> $answers
#> # A tibble: 30 × 5
#> id_individu type distance_km transport temps_trajet_en_heures
#> <chr> <chr> <dbl> <fct> <dbl>
#> 1 ID-NYDZ-010 travail 12.2 voiture 0.15
#> 2 ID-NYDZ-010 commerces 9.61 bus 1.01
#> 3 ID-NYDZ-010 loisirs 549. avion 0.27
#> 4 ID-PWLB-009 travail 11.9 voiture 0.14
#> 5 ID-PWLB-009 commerces 27.4 voiture 0.34
#> 6 ID-PWLB-009 loisirs 210. train 0.42
#> 7 ID-NMQG-001 travail 2.38 velo 0.43
#> 8 ID-NMQG-001 commerces 14.9 voiture 0.18
#> 9 ID-NMQG-001 loisirs 446. train 0.89
#> 10 ID-RJXN-002 travail 6.18 mobylette 0.75
#> # ℹ 20 more rows
<- fake_survey_answers(n = 30)
answers #> old-style crs object detected; please recreate object with a recent sf::st_crs()
answers#> # A tibble: 90 × 12
#> id_individu age sexe region id_departement nom_departement
#> <chr> <int> <chr> <chr> <chr> <chr>
#> 1 ID-MROK-007 NA M Nord-Pas-de-Calais 62 Pas-de-Calais
#> 2 ID-MROK-007 NA M Nord-Pas-de-Calais 62 Pas-de-Calais
#> 3 ID-MROK-007 NA M Nord-Pas-de-Calais 62 Pas-de-Calais
#> 4 ID-NYDZ-010 49 M Midi-Pyrénées 82 Tarn-et-Garonne
#> 5 ID-NYDZ-010 49 M Midi-Pyrénées 82 Tarn-et-Garonne
#> 6 ID-NYDZ-010 49 M Midi-Pyrénées 82 Tarn-et-Garonne
#> 7 ID-HXOG-015 50 M Bourgogne 71 <NA>
#> 8 ID-HXOG-015 50 M Bourgogne 71 <NA>
#> 9 ID-HXOG-015 50 M Bourgogne 71 <NA>
#> 10 ID-MZNB-024 70 F Aquitaine 47 Lot-et-Garonne
#> # ℹ 80 more rows
#> # ℹ 6 more variables: question_date <dttm>, year <dbl>, type <chr>,
#> # distance_km <dbl>, transport <fct>, temps_trajet_en_heures <dbl>
ggplot(answers) +
aes(age, log(distance_km), colour = type) +
geom_point() +
geom_smooth() +
facet_wrap(~type, scales = "free_y")
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning: Removed 6 rows containing non-finite values (`stat_smooth()`).
#> Warning: Removed 6 rows containing missing values (`geom_point()`).