pacman::p_load(jsonlite,tidygraph, ggraph, visNetwork, tidyverse) In-class Exercise 5
Part 1 - Explore JSON
- load package
- read JSON file
MC1 <- jsonlite::fromJSON("D:/MITB/ISSS608/fruitpunchsamurai666/project/MC1/data/MC1.json")- read nodes and edge dataframe from JSON
#read the sub-dataframe from the json file, select and rearrange the columns needed
MC1_nodes <- as_tibble(MC1$nodes) %>% select(id, type, country)
MC1_edges <- as_tibble(MC1$links) %>% select(source, target, type, weight, key)Part 2 - Network Visualization
- load package
pacman::p_load(igraph, tidygraph, ggraph,
visNetwork, lubridate, clock,
tidyverse, graphlayouts)- read data
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")glimpse(GAStech_edges)Rows: 9,063
Columns: 8
$ source <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 26, 26, 26…
$ target <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, 27, 28, 29…
$ SentDate <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014"…
$ SentTime <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:00, 08:58:0…
$ Subject <chr> "GT-SeismicProcessorPro Bug Report", "GT-SeismicProcessorP…
$ MainSubject <chr> "Work related", "Work related", "Work related", "Work rela…
$ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "Kanon.Herr…
$ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "Hideki.Coc…
- Wrangling time
GAStech_edges <- GAStech_edges %>%
mutate(SendDate = dmy(SentDate)) %>%
mutate(Weekday = wday(SentDate,
label = TRUE,
abbr = FALSE))- Wrangling attributes by grouping by source-target-day and then removing emails that are cc to senders or only has been sent once.
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source, target, Weekday) %>%
summarise(Weight = n()) %>%
filter(source!=target) %>%
filter(Weight > 1) %>%
#ungroup() is a function in the dplyr package in R that removes grouping from a grouped data frame. When data is grouped, operations are applied to each group separately. ungroup() is used to reverse the grouping operation so that subsequent operations are applied to the entire data frame as a whole.
ungroup()- build tidygraph data model and rearrange the rows by weight in descending order
GAStech_graph <- tbl_graph(nodes = GAStech_nodes,
edges = GAStech_edges_aggregated,
directed = TRUE)
GAStech_graph %>%
activate(edges) %>%
arrange(desc(Weight))# A tbl_graph: 54 nodes and 1372 edges
#
# A directed multigraph with 1 component
#
# A tibble: 1,372 × 4
from to Weekday Weight
<int> <int> <ord> <int>
1 40 41 Saturday 13
2 41 43 Monday 11
3 35 31 Tuesday 10
4 40 41 Monday 10
5 40 43 Monday 10
6 36 32 Sunday 9
# ℹ 1,366 more rows
#
# A tibble: 54 × 4
id label Department Title
<dbl> <chr> <chr> <chr>
1 1 Mat.Bramar Administration Assistant to CEO
2 2 Anda.Ribera Administration Assistant to CFO
3 3 Rachel.Pantanal Administration Assistant to CIO
# ℹ 51 more rows
- plot the graph without x and y axis using Fruchterman and Reingold layout
g <- ggraph(GAStech_graph,layout = "fr") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
geom_node_point(aes(colour = Department,
size = 3))
g + theme_graph()
working with facet (edge as an example)
set_graph_style() g <- ggraph(GAStech_graph, layout = "nicely") + geom_edge_link(aes(width=Weight), alpha=0.2) + scale_edge_width(range = c(0.1, 5)) + geom_node_point(aes(colour = Department), size = 2) g + facet_edges(~Weekday) + th_foreground(foreground = "grey80", border = TRUE) + theme(legend.position = 'bottom')
interactive network graph with visNetwork
GAStech_edges_aggregated <- GAStech_edges %>% left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>% rename(from = id) %>% left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>% rename(to = id) %>% filter(MainSubject == "Work related") %>% group_by(from, to) %>% summarise(weight = n()) %>% filter(from!=to) %>% filter(weight > 1) %>% ungroup()
#shades the nodes by assigning unique colour to each category in the group field.
GAStech_nodes <- GAStech_nodes %>%
rename(group = Department)
visNetwork(GAStech_nodes,
GAStech_edges_aggregated) %>%
visIgraphLayout(layout = "layout_with_fr") %>%
visOptions(highlightNearest = TRUE,
nodesIdSelection = TRUE) %>%
visLegend() %>%
visLayout(randomSeed = 123)