-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path08_comoving_analysis_AA rewrite2.R
executable file
·417 lines (303 loc) · 18.3 KB
/
08_comoving_analysis_AA rewrite2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
library( stringr )
library( ggplot2 )
library( hms )
library( dplyr )
## fcn to provide a unique name to each unique group dyad
dy_name <- function(vec) {
temp <- sort( c( vec[ 'group1' ], vec[ 'group2' ] ) )
return( paste( temp[1], temp[2], sep = '-') )
}
# SET UP -------------------------------------------------------------------------------------------
### user-inputs ###
SUBSET <- TRUE
dist_thresh <- 600
quant <- 0.80 ## quantile of intra-group dyadic distances to determine the maximum allowable
samp_int <- 15 ## sampling interval -- this should stay at 15
min_move <- 100 ## minimum distance required to move while being within the distance set by the quant variable above to be considered a co-move
min_comove_duration <- 0 ## minimum duration required remain iwthin the distance set by the quant variable above to be considered a co-move
# median departure time and arrival time at the sleep site, respectively. These are determined in a previous script (but could be determined here instead of hard-coding before publishing)
median_leave_time <- "08:30:00"
median_arrive_time <- "17:30:00"
# bring in data
if(SUBSET){spec_df <- read.csv("bab_dyad_subset.csv")}else{spec_df <- read.csv("bab_dyad.csv")}
head( spec_df )
spec_df$local_timestamp <- as.POSIXct( spec_df$local_timestamp, tz = 'UTC' )
spec_df$date <- as.Date( spec_df$local_timestamp )
spec_df$dy_name <- apply( spec_df[ , c( 'group1', 'group2' ) ], 1, function( x ) paste( sort( x ), collapse = '_' ) )
#now duplicate the main df but take out overnight data
str(spec_df)
spec_df$time_only <- as_hms(spec_df$local_timestamp)
spec_df_day <- droplevels(subset(spec_df, time_only <= as_hms("17:30:00") & time_only >= as_hms("08:30:00")))
### find typical intragroup dists ###
groupmate_df_day <- spec_df_day[ spec_df_day$group1 == spec_df_day$group2 & spec_df_day$id1 != spec_df_day$id2, ]
groupmate_df_day <- groupmate_df_day[ !duplicated( groupmate_df_day$local_timestamp ), ]
hist( groupmate_df_day$dyadDist )
group_spread_dist_day <- quantile( groupmate_df_day$dyadDist, quant, na.rm = T )
group_spread_dist_day #84.85
abline( v = group_spread_dist_day, col = 'red' )
# By DAY --------------------------------------------------------------------------------------------
#### First, determine if 2 groups shared a sleep site the night before or the next night, use all data
daily_df <- unique( spec_df[ , c( 'group1', 'group2', 'date' ) ] )
daily_df <- daily_df[ daily_df$group1 != daily_df$group2, ]
daily_df$dy_name <- apply( daily_df[ , c( 'group1', 'group2' ) ], 1, function( x ) paste( sort( x ), collapse = '-' ) )
rownames( daily_df ) <- NULL
daily_df <- daily_df[ ! duplicated( daily_df[ , c( 'dy_name', 'date' ) ] ), ]
daily_df$cosleep_last_night <- NA
daily_df$cosleep_tonight <- NA
for( i in 1:nrow( daily_df ) ){
# check whether the two groups sleep together on the previous night
sleep_site_1 <- unique( spec_df[ spec_df$group1 == daily_df$group1[ i ] & spec_df$date == as.Date( daily_df$date[ i ] - 1, origin = '1970-01-01' ), 'sleep_clus' ] )
sleep_site_1 <- sleep_site_1[ !is.na( sleep_site_1 ) ]
sleep_site_2 <- unique( spec_df[ spec_df$group1 == daily_df$group2[ i ] & spec_df$date == as.Date( daily_df$date[ i ] - 1, origin = '1970-01-01' ), 'sleep_clus' ] )
sleep_site_2 <- sleep_site_2[ !is.na( sleep_site_2 ) ]
if( length( sleep_site_1 ) > 1 | length( sleep_site_2 ) > 1 ) stop( 'more than one sleep site for this day' )
## if we know where both groups slept on the previous night
if( length( sleep_site_1 ) == 1 & length( sleep_site_2 ) == 1 ){
# add whether they slept at the same site or not as a binary variable to the dataframe
daily_df$cosleep_last_night[ i ] <- as.numeric( sleep_site_1 == sleep_site_2 )
}
# same for current night
sleep_site_1 <- unique( spec_df[ spec_df$group1 == daily_df$group1[ i ] & spec_df$date == as.Date( daily_df$date[ i ], origin = '1970-01-01' ), 'sleep_clus' ] )
sleep_site_1 <- sleep_site_1[ !is.na( sleep_site_1 ) ]
sleep_site_2 <- unique( spec_df[ spec_df$group1 == daily_df$group2[ i ] & spec_df$date == as.Date( daily_df$date[ i ], origin = '1970-01-01' ), 'sleep_clus' ] )
sleep_site_2 <- sleep_site_2[ !is.na( sleep_site_2 ) ]
if( length( sleep_site_1 ) > 1 | length( sleep_site_2 ) > 1 ) stop( 'more than one sleep site for this day' )
## if we know where both groups slept on the current night
if( length( sleep_site_1 ) == 1 & length( sleep_site_2 ) == 1 ){
# add whether they slept at the same site or not as a binary variable to the dataframe
daily_df$cosleep_tonight[ i ] <- as.numeric( sleep_site_1 == sleep_site_2 )
}
}
# NA value means missing data for one or both groups
# 0 means no share
# 1 means shared
#get summary stats
sum( daily_df$cosleep_last_night == 0, na.rm = T) #this sum hasn't changed
sum( daily_df$cosleep_last_night == 1, na.rm = T) #this sum hasn't changed
# By ENCOUTER --------------------------------------------------------------------------------------------
#### Second, for each interaction, determine if the two groups moved together, and if so, did they comove together
#enc_df <- readRDS("baboons_dyadDurat_dayonly.rds") #from script 7
#rownames(enc_df) <- NULL
if(SUBSET){enc_df <- read.csv("enc_df_subset.csv")} else {enc_df <- read.csv("enc_df.csv")}
enc_df$start_local_timestamp <- as.POSIXct(enc_df$start_local_timestamp, tz='UTC')
enc_df$end_local_timestamp <- as.POSIXct(enc_df$end_local_timestamp, tz='UTC')
#prep
spec_df_day$dy_name <- apply( spec_df_day, 1, FUN = dy_name)
enc_df <- enc_df[order(enc_df$dyadID, enc_df$start_local_timestamp ),]
enc_df$enc_number <- 1:nrow(enc_df)
enc_df$move_together_group1 <- 0
enc_df$move_together_group2 <- 0
enc_df$comove <- 0
#need comove_df
comove_df <- data.frame(cbind(dy_name = rep(NA, 200),
enc_number = rep(NA, 200),
start_local_timestamp = rep(0, 200),
end_local_timestamp = rep(0, 200),
move_comove_group_1 = rep(NA, 200),
move_comove_group_2 = rep(NA, 200),
dur_comove = rep(NA, 200)))
c <- 1 #start a comove_df row counter
for (i in 1:nrow(enc_df)) {
print(i)
#subset to just get gps data from within the time of the interaction
spec_df_sub <- droplevels(subset(spec_df_day, spec_df_day$dy_name == enc_df$dyadID[i] &
spec_df_day$local_timestamp >= enc_df$start_local_timestamp[i] &
spec_df_day$local_timestamp <= enc_df$end_local_timestamp[i]))
## DIST MOVED TOGETHER ##
# where there is more than one indiv in the group, sort out which one to use
#make df for group 1
group1_df <- spec_df_sub[ spec_df_sub$group1 == enc_df$group1[ i ], ]
#figure out which individual (if there is >1 has the most data)
id_to_use <- names( table( group1_df$id1 ) )[ which.max( table( group1_df$id1 ) ) ]
#use that individual's data
id1_df <- group1_df[ group1_df$id1 == id_to_use, ]
#sort it by intergroup distance (#in case other group also has >1 collared indiv, the next two lines take out the farther away indiv)
id1_df <- id1_df[ order( id1_df$dyadDist ), ]
#remove duplicates
id1_df_no_dup <- id1_df[ !duplicated( id1_df$local_timestamp ), ]
#sort it by time
id1_df_no_dup <- id1_df_no_dup[ order( id1_df_no_dup$local_timestamp ), ]
#do all the same for the other groups
group2_df <- spec_df_sub[ spec_df_sub$group1 == enc_df$group2[ i ], ]
id_to_use <- names( table( group2_df$id1 ) )[ which.max( table( group2_df$id1 ) ) ]
id2_df <- group2_df[ group2_df$id1 == id_to_use, ]
id2_df <- id2_df[ order( id2_df$dyadDist ), ]
id2_df_no_dup <- id2_df[ !duplicated( id2_df$local_timestamp ), ]
id2_df_no_dup <- id2_df_no_dup[ order( id2_df_no_dup$local_timestamp ), ]
#calculate distances moved together, dump into enc_df
# greater than for the first part because the step taken to get into an encounter does not count towards the distance traveled together while in an encounter
time_sub_1 <- id1_df_no_dup[ id1_df_no_dup$local_timestamp > enc_df$start_local_timestamp[ i ] & id1_df_no_dup$local_timestamp <= enc_df$end_local_timestamp[ i ], ]
enc_df$move_together_group1[ i ] <- sum( time_sub_1$spat.disc.dist ) ##so moved distance is SUM OF EACH STEP, not displacement
# greater than for the first part because the step taken to get into an encounter does not count towards the distance traveled together while in an encounter
time_sub_2 <- id2_df_no_dup[ id2_df_no_dup$local_timestamp > enc_df$start_local_timestamp[ i ] & id2_df_no_dup$local_timestamp <= enc_df$end_local_timestamp[ i ], ]
enc_df$move_together_group2[ i ] <- sum( time_sub_2$spat.disc.dist )
rm(time_sub_1, time_sub_2)
## COMOVEMENT ##
spec_df_sub_min <- aggregate( spec_df_sub$dyadDist, by = list( spec_df_sub$local_timestamp, spec_df_sub$dy_name ), FUN = min, na.rm = T )
names( spec_df_sub_min ) <- c( 'local_timestamp', 'dy_name', 'dyadDist' )
spec_df_sub_min <- spec_df_sub_min[ order( spec_df_sub_min$local_timestamp ), ]
#all times when within the comovement threshold
comoving_sub <- spec_df_sub_min[ spec_df_sub_min$dyadDist < group_spread_dist_day, ] ######################### use group_spread_dist OR group_spread_dist_day????
if( nrow( comoving_sub ) != 0 ){ # if the dyad comes within this distance of each other... then determine if cohesive movement occured
print(paste(i, "in maybe comove if"))
# find the time differences between observations of potential encounters
diff_min <- as.numeric( diff(comoving_sub$local_timestamp), units = 'mins' )
# find out how many of these are consecutive GPS fixes
tsig <- c(F,(abs(diff_min) == samp_int))
# find the indices that represent the start of a potential encounter (i.e. more than the sampling interval after the previous time the dyad was in close proximity)
startIndex <- which(tsig == F)
# find the indices associated with the ends of potential encounters (when the next time of close proximity is more than the sampling interval after an observation of proximity)
endIndex <- c((startIndex[-1] - 1), nrow(comoving_sub))
start_times <- comoving_sub$local_timestamp[startIndex]
end_times <- comoving_sub$local_timestamp[endIndex]
dur_interact <- as.numeric( end_times - start_times, units = 'mins')
num_of_possible_comoves <- which( dur_interact >= min_comove_duration )
for (j in seq_along(num_of_possible_comoves)) {
print(paste(i, ", ", j))
# greater than for the first part because the step taken to get into an encounter does not count towards the distance traveled together while in an encounter
time_sub_1 <- id1_df_no_dup[ id1_df_no_dup$local_timestamp > start_times[ j ] & id1_df_no_dup$local_timestamp <= end_times[ j ], ]
# greater than for the first part because the step taken to get into an encounter does not count towards the distance traveled together while in an encounter
time_sub_2 <- id2_df_no_dup[ id2_df_no_dup$local_timestamp > start_times[ j ] & id2_df_no_dup$local_timestamp <= end_times[ j ], ]
if( sum( time_sub_1$spat.disc.dist, na.rm = T ) > min_move & sum( time_sub_2$spat.disc.dist, na.rm = T ) > min_move ){ # if both groups exceed distance threshold
print(paste(i, "yes to comove"))
enc_df$comove[i] <- enc_df$comove[i] + 1 #count up the number of comove bouts for that encounter
comove_df$dy_name[c] <- enc_df$dyadID[i]
comove_df$enc_number[c] <- enc_df$enc_number[i]
#now save the relevant info about the comovement bout
#start & end time
print(start_times[j])
print(end_times[j])
comove_df$start_local_timestamp[c] <- start_times[ j ]
comove_df$end_local_timestamp[c] <- end_times[ j ]
comove_df$dur_comove[c] <- as.numeric( end_times[ j ] - start_times[ j ], units = "mins" )
#distances travelled
comove_df$move_comove_group_1[c] <- sum( time_sub_1$spat.disc.dist )
comove_df$move_comove_group_2[c] <- sum( time_sub_2$spat.disc.dist )
#count up the comove row index
c <- c + 1
} # close if comove >= minmove statement
rm(time_sub_1, time_sub_2)
} #close j loop
} # close if comove = true statement
} # close i loop
comove_df <- droplevels(subset(comove_df, is.na(comove_df$dur_comove) != TRUE))
sum(enc_df$comove) == nrow(comove_df) ##SHOULD BE TRUE!!!
comove_df$start_local_timestamp <- as.POSIXct(comove_df$start_local_timestamp, origin = '1970-01-01', tz = "UTC")
comove_df$end_local_timestamp <- as.POSIXct(comove_df$end_local_timestamp, origin = '1970-01-01', tz = "UTC")
comove_df$date <- as.Date( comove_df$start_local_timestamp )
enc_df$date <- as.Date( enc_df$start_local_timestamp )
enc_df <- left_join(enc_df, daily_df)
comove_df <- left_join(comove_df, daily_df)
# Exploring Output ---------------------------------------------------------------------------------
# > COHESIVE MOVT ------------------------------------------------------------------------------------
### Get basic summary stats for cohesive movement
comove_by_day <- comove_df %>%
group_by(dy_name, date) %>%
summarize(total_move_comove_group_1 = sum(move_comove_group_1),
total_move_comove_group_2 = sum(move_comove_group_2),
total_dur_comove = sum(dur_comove),
mean_total_comove = mean(total_move_comove_group_1, total_move_comove_group_2),
cosleep_last_night = mean(cosleep_last_night))
#on how many days was there comovement
nrow(comove_by_day)
#on how many days did comove happen after cosleep
nrow(subset(comove_by_day, cosleep_last_night == 1))
nrow(subset(comove_by_day, cosleep_last_night == 0))
#distance moved cohesively on days after sharing a sleep site
mean(comove_by_day$mean_total_comove[which(comove_by_day$cosleep_last_night == 1)])
#distance moved cohesively on days after NOT sharing a sleep site
mean(comove_by_day$mean_total_comove[which(comove_by_day$cosleep_last_night == 0)])
#duration of comovement on days after sharing a sleep site
mean(comove_by_day$total_dur_comove[which(comove_by_day$cosleep_last_night == 1)])
sd(comove_by_day$total_dur_comove[which(comove_by_day$cosleep_last_night == 1)])
#duration of comovement on days after NOT sharing a sleep site
mean(comove_by_day$total_dur_comove[which(comove_by_day$cosleep_last_night == 0)])
sd(comove_by_day$total_dur_comove[which(comove_by_day$cosleep_last_night == 0)])
saveRDS(comove_by_day, "RESULTS/Cohesive_movement_summary_byday.rds")
# > ALL ENCS ------------------------------------------------------------------------------------------
### Get basic summary stats for all encounters
enc_by_day <- enc_df %>%
group_by(dy_name, date) %>%
summarize(total_move_group_1 = sum(move_together_group1),
total_move_group_2 = sum(move_together_group2),
total_dur_enc = sum(duration),
mean_total_move = mean(c(move_together_group1, move_together_group2)),
cosleep_last_night = mean(cosleep_last_night))
#on how many days was there comovement
nrow(comove_by_day)
#number of encounters after cosleep
sum(enc_df$cosleep_last_night == 1, na.rm = T)
sum(enc_df$cosleep_last_night == 0, na.rm = T)
# > PLOTS ---------------------------------------------------------------------------------------------
ggplot() +
geom_boxplot(data = comove_by_day,
mapping = aes(x = as.factor(cosleep_last_night),
y = total_dur_comove)) +
geom_point(data = comove_by_day,
mapping = aes(x = as.factor(cosleep_last_night),
y = total_dur_comove),
position = position_jitter(width = 0.1),
size = 4,
alpha = 0.5) +
scale_y_continuous(limits = c(0, 300))
ggplot() +
geom_boxplot(data = comove_by_day,
mapping = aes(x = as.factor(cosleep_last_night),
y = mean_total_comove)) +
geom_point(data = comove_by_day,
mapping = aes(x = as.factor(cosleep_last_night),
y = mean_total_comove),
position = position_jitter(width = 0.1),
size = 4,
alpha = 0.5)
# ?SUPP FIGURE --------------------------------------------------------------------------------------
#### SUPPLEMENTARY FIGURE of ALL ENCOUNTERS plotted by TIME OF DAY
enc_df$start_time <- as_hms(enc_df$start_local_timestamp)
enc_df$end_time <- as_hms(enc_df$end_local_timestamp)
comove_df$start_time <- as_hms(comove_df$start_local_timestamp)
comove_df$end_time <- as_hms(comove_df$end_local_timestamp)
ggplot() +
geom_segment(data = enc_df,
mapping = aes(x = start_time,
xend = end_time,
y = paste(dyadID, date),
yend = paste(dyadID, date),
color = dyadID),
linewidth = 2,
lineend = "round") +
geom_point(data = subset(enc_df, cosleep_last_night == 1),
mapping = aes(x = as_hms("08:25:00"),
y = paste(dyadID, date)),
shape = 8) +
geom_point(data = subset(enc_df, cosleep_tonight == 1),
mapping = aes(x = as_hms("17:35:00"),
y = paste(dyadID, date)),
shape = 8) +
geom_segment(data = comove_df,
mapping = aes(x = start_time,
xend = end_time,
y = paste(dy_name, date),
yend = paste(dy_name, date)),
color = "white",
alpha = 0.9,
linewidth = 1.5,
lineend = "round") +
geom_text(data = enc_df %>% group_by(dyadID) %>% slice(n()),
mapping = aes(x = as_hms("08:20:00"),
y = paste(dyadID, date),
label = dyadID,
color = dyadID),
hjust = 1,
vjust = 1,
fontface = "bold") +
scale_color_manual(values = c( "#fd6100" , "#dc267f", "#785ef0", "#feb000","#648fff")) +
scale_y_discrete(name = "Dyad - Day",
expand = c(0.02, 0)) +
scale_x_time(name = "Time of day (08:30 to 17:30)",
breaks = c(as_hms(c("08:30:00", "10:00:00", "12:00:00", "14:00:00", "16:00:00", "17:30:00"))),
limits = c(as_hms(c("08:10:00", "17:37:00")))) +
theme_bw()+
theme(axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
panel.border = element_blank(),
legend.position = "none")