generated from rpodcast/r_dev_projects
-
Notifications
You must be signed in to change notification settings - Fork 0
/
explore.R
75 lines (61 loc) · 1.94 KB
/
explore.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Explore data sets
library(dplyr)
library(tidyr)
library(purrr)
library(reactable)
library(reactablefmtr)
library(htmltools)
library(pointblank)
library(anytime)
source("quarto_dashboard/R/utils.R")
source("quarto_dashboard/R/fct_tables.R")
pod_dup_df <- podcastdb_dupdf_object("https://podcast20-projects.us-east-1.linodeobjects.com/exports/podcast_dup_df.rds")
pod_an_df <- podcastdb_analysisdf_object("https://podcast20-projects.us-east-1.linodeobjects.com/exports/analysis_metrics_df.rds")
pointblank_object <- podcastdb_pointblank_object("https://podcast20-projects.us-east-1.linodeobjects.com/exports/podcastdb_pointblank_object/podcastdb_pointblank_object")
pb_extracts <- get_data_extracts(pointblank_object)
pb_extracts_sub <- pb_extracts[c('1', '3', '4', '7')]
pb_extracts_sub_clean <- purrr::map(
pb_extracts_sub,
~clean_podcast_df(.x),
.progress = TRUE
)
val_set <- pointblank_object$validation_set
log_list <- podcastdb_log_object(
root_url = "https://podcast20-projects.us-east-1.linodeobjects.com/logs/",
date = "2024-03-11"
) |>
date_report()
#saveRDS(pod_dup_df, file = "dbfiles/pod_dup_df.rds")
#saveRDS(pod_an_df, file = "dbfiles/pod_an_df.rds")
pod_dup_df |>
filter(id == 792) |>
#View()
record_detail_table()
df <- pod_dup_df |>
mutate(
imageUrl = case_when(
imageUrl == "" ~ "https://podcastindex.org/images/no-cover-art.png",
stringr::str_length(imageUrl) < 29 ~ "https://podcastindex.org/images/no-cover-art.png",
.default = imageUrl
)
)
df |>
filter(!grepl("https|http", imageUrl))
pod_an_df <- pod_an_df |>
mutate(
pub_timespan_days_list = purrr::flatten(pub_timespan_days_list)
) |>
mutate(
pub_timespan_days_list = purrr::map(pub_timespan_days_list, ~{
dplyr::coalesce(.x, 0L)
})
)
df_sub <- select(pod_an_df, record_group, pub_timespan_days_list)
reactable(
df_sub,
columns = list(
pub_timespan_days_list = colDef(
cell = react_sparkline(df_sub)
)
)
)