A Appendix I
A.1 Initial data preparation script
if (!require("pacman")) install.packages("pacman")
pacman::p_load(knitr, pryr, openxlsx, tidyverse,
data.table, DT, DescTools, RCurl, countrycode)
options(warn = -1, digits = 4, scipen = 999)
#------------------------------------------------------------
#External data (country geocodes to replace missing lat lons)
#------------------------------------------------------------
geocodes <-fread("https://github.com/oughton/geocode/raw/master/example/result.csv")%>%
select(country = V1, country_latitude = V2, country_longitude = V3) %>%
mutate(ISO = countrycode(country, 'country.name', 'iso3c')) %>%
filter(!is.na(ISO)) %>%
select(ISO, country_latitude, country_longitude)
saveRDS(geocodes, "country_geocodes.rds")
country_geocodes <- readRDS("country_geocodes.rds")
#---------------------------------------
#data preparation (GTD)
#---------------------------------------
tmp <- read.xlsx("data/data_preparation/globalterrorismdb_0617dist.xlsx",
sheet = 1, colNames = TRUE) %>%
select(eventid,
year = iyear,
month = imonth,
day = iday,
country = country_txt,
region = region_txt,
provstate,
city,
latitude, # 2.7% NAs will be replaced with country level geocodes
longitude,
attack_type = attacktype1_txt,
weapon_type = weaptype1_txt,
target_type = targtype1_txt,
target_nalty= natlty1_txt,
group_name = gname,
nkill, # 5% NAs
nwound, # 9% NAs
extended,
crit1_pol_eco_rel_soc = crit1,
crit2_publicize = crit2,
crit3_os_intl_hmn_law = crit3,
part_of_multiple_attacks = multiple,
attack_success = success,
suicide_attack = suicide,
individual_attack = individual,
intl_logistical_attack = INT_LOG,
intl_ideological_attack = INT_IDEO
) %>%
replace_na(list(provstate = "unknown", # replace nas with unknown
city = "unknown",
target_nalty = "unknown")) %>%
mutate(ISO = countrycode(country, 'country.name', 'iso3c'), #standardize country name
month = if_else(month == 0, 1, month),#replace unknown month to 1 in 20 occurences
day = if_else(day == 0, 1, day), #replace unknown day to 1 in 891 occurences
date = paste(year, month, day, sep="-"),
date = as.Date(date, format = "%Y-%m-%d"),
weapon_type = if_else(
weapon_type == "Vehicle (not to include vehicle-borne
explosives, i.e., car or truck bombs)",
"Vehicle", weapon_type)) %>% # shorten lengthy name
left_join(country_geocodes) %>%
mutate(latitude = ifelse(is.na(latitude), country_latitude,
latitude), # replace missing lat lons with country lat lons
longitude = ifelse(is.na(longitude), country_longitude, longitude)) %>%
select(-c(country_latitude, country_longitude)) %>%
# replace missing lat lons in remaining (~14) disputed/dissolved countries
# with country level lat long from prev obs
mutate(
latitude = if_else(is.na(latitude) & country ==
"People's Republic of the Congo", -0.2, latitude),
longitude = if_else(is.na(longitude) & country ==
"People's Republic of the Congo", 15.8, longitude),
latitude = if_else(is.na(latitude) & country ==
"Democratic Republic of the Congo", -4.0, latitude),
longitude = if_else(is.na(longitude) & country ==
"Democratic Republic of the Congo", 21.7, longitude),
latitude = if_else(is.na(latitude) & country ==
"North Yemen", 15.5, latitude),
longitude = if_else(is.na(longitude) & country ==
"North Yemen", 48.5, longitude),
latitude = if_else(is.na(latitude) & country ==
"South Yemen", 12.8, latitude),
longitude = if_else(is.na(longitude) & country ==
"South Yemen", 45.0, longitude),
latitude = if_else(is.na(latitude) & country ==
"Western Sahara", 27.4, latitude),
longitude = if_else(is.na(longitude) & country ==
"Western Sahara", -9.0, longitude),
latitude = if_else(is.na(latitude) & country ==
"Guadeloupe", 16.2, latitude),
longitude = if_else(is.na(longitude) & country ==
"Guadeloupe", -61.5, longitude),
latitude = if_else(is.na(latitude) & country ==
"New Caledonia", -20.9, latitude),
longitude = if_else(is.na(longitude) & country ==
"New Caledonia", 165.6, longitude),
latitude = if_else(is.na(latitude) & country == "Martinique", 14.6, latitude),
longitude = if_else(is.na(longitude) & country == "Martinique", -61.0, longitude),
latitude = if_else(is.na(latitude) & country == "Zaire", -2.5, latitude),
longitude = if_else(is.na(longitude) & country == "Zaire", 28.8, longitude),
latitude = if_else(is.na(latitude) & country == "Kosovo", 43.1, latitude),
longitude = if_else(is.na(longitude) & country == "Kosovo", 20.7, longitude),
latitude = if_else(is.na(latitude) & country ==
"Czechoslovakia", 50.6, latitude),
longitude = if_else(is.na(longitude) & country ==
"Czechoslovakia", 14.0, longitude),
latitude = if_else(is.na(latitude) & country == "Yugoslavia", 42.5, latitude),
longitude = if_else(is.na(longitude) & country == "Yugoslavia", 20.5, longitude)
)
#--------------------------------------------------------------
#External data (World Devlopment Indicators from worldbank api)
#--------------------------------------------------------------
WDIsearch('conflict') # enter search text and extract code
ind = c(
"arms_export" = "MS.MIL.XPRT.KD", # Arms exports (SIPRI trend indicator values)
"arms_import" = "MS.MIL.MPRT.KD", # Arms imports (SIPRI trend indicator values)
"population" = "SP.POP.TOTL", # Population, total
"gdp_per_capita" = "NY.GDP.PCAP.KD",# GDP per capita (constant 2010 US$)
"refugee_origin" = "SM.POP.REFG.OR",# Refugee population by country of origin
"refugee_asylum" = "SM.POP.REFG", # Refugee population by country of asylum
"net_migration" = "SM.POP.NETM", # Net migration
"n_peace_keepers" = "VC.PKP.TOTL.UN",# Presence of peace keepers
"conflict_index" = "IC.PI.CIR") # conflict index (0-10)
countries_vec <- as.vector(unique(df$ISO)) # countries in gtd dataset
wdi_data <- WDI(indicator = ind, start = 1970, end = 2016, extra = TRUE) %>%
select(year, ISO = iso3c, arms_export, arms_import, population,
gdp_per_capita, refugee_origin, refugee_asylum, net_migration,
n_peace_keepers, conflict_index) %>%
drop_na(ISO) %>%
filter(ISO %in% countries_vec) %>%
# replacing NAs for visualization and modelling purpose
replace_na(list(arms_export = 0,
arms_import = 0,
population = -1,
gdp_per_capita = 0,
refugee_origin = 0,
refugee_asylum = 0,
net_migration = 0,
n_peace_keepers = 0,
conflict_index = -1))
df <- df %>% left_join(wdi_data)
saveRDS(df, "gtd_clean_v2.rds")
# move all data to: gtd_eda/index/data path for shiny and thesis writing
# "df" is the main file used throughout this research
#---------------------------------------
# iso3c file for worldmap
#---------------------------------------
countries <- df %>% group_by(country) %>% summarise(total = round(n()))
countries$iso3 <- countrycode(countries$country,
origin = "country.name", destination = "iso3c")
saveRDS(countries, "countries.rds")
A.2 List of variables and short description
Name of the Variable | description |
---|---|
eventid | a 12-digit Event ID |
year | year in which the incident occurred |
month | month |
day | day |
country | country |
region | world region |
provstate | an administrative division or unit of a country |
city | city |
latitude | latitude |
longitude | longitude |
attack_type | method of attack (reflects the broad class of tactics used) |
weapon_type | type of weapon used in the incident |
target_type | type of target/victim |
target_nalty | nationality of the target that was attacked |
group_name | name of the group that carried out the attack |
nkill | number of total confirmed fatalities for the incident |
nwound | number of confirmed non-fatal injuries |
extended | whether or not an incident extended more than 24 hours |
crit1_pol_eco_rel_soc | political, economic, religious, or social goal |
crit2_publicize | intention to coerce, or publicize to larger audience |
crit3_os_intl_hmn_law | action from the incident is outside intl humanitarian law |
part_of_multiple_attacks | whether an incident being part of multiple attacks |
attack_success | suicide attack |
suicide_attack | whether an incident was successful |
individual_attack | whether an attack carried out by unaffiliated Individual(s) |
intl_logistical_attack | cross border incident |
intl_ideological_attack | attack on target of a different nationality |
ISO | ISO code for country |
date | Approx. date of incident |
arms_export | Arms exports (SIPRI trend indicator values) |
arms_import | Arms imports (SIPRI trend indicator values) |
population | Population, total |
gdp_per_capita | GDP per capita (constant 2010 US$) |
refugee_origin | Refugee population by country or territory of origin |
refugee_asylum | Refugee population by country or territory of asylum |
net_migration | Net migration |
n_peace_keepers | Presence of peace keepers |
conflict_index | Extent of conflict of interest regulation index (0-10) |
A.3 R Session Info:
sessionInfo()
R version 3.5.0 (2018-04-23)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17134)
Matrix products: default
locale:
[1] LC_COLLATE=English_United Kingdom.1252
[2] LC_CTYPE=English_United Kingdom.1252
[3] LC_MONETARY=English_United Kingdom.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United Kingdom.1252
attached base packages:
[1] parallel grid stats graphics grDevices utils datasets
[8] methods base
other attached packages:
[1] bindrcpp_0.2.2 ggthemes_3.5.0
[3] servr_0.10 lightgbm_2.1.2
[5] R6_2.2.2 pROC_1.12.1
[7] caret_6.0-80 lattice_0.20-35
[9] eply_0.1.2 maps_3.3.0
[11] maptools_0.9-2 sp_1.3-1
[13] ggmap_2.6.1 shiny_1.1.0
[15] treemapify_2.5.0 WDI_2.5
[17] RJSONIO_1.3-0 imputeTS_2.7
[19] tseries_0.10-45 forecast_8.4
[21] tidyquant_0.5.5 forcats_0.3.0
[23] purrr_0.2.5 readr_1.1.1
[25] tidyr_0.8.1 tibble_1.4.2
[27] tidyverse_1.2.1 quantmod_0.4-13
[29] TTR_0.23-3 PerformanceAnalytics_1.5.2
[31] xts_0.10-2 zoo_1.8-2
[33] timetk_0.1.1 TSstudio_0.1.1.9000
[35] igraph_1.2.1 visNetwork_2.0.4
[37] arulesViz_1.3-1 arules_1.6-1
[39] Matrix_1.2-14 d3heatmap_0.6.1.2
[41] treemap_2.4-2 highcharter_0.6.0
[43] plotly_4.7.1.9000 ggfortify_0.4.5
[45] RColorBrewer_1.1-2 viridis_0.5.1
[47] viridisLite_0.3.0 leaflet.extras_1.0.0
[49] leaflet_2.0.1 countrycode_1.00.0
[51] lubridate_1.7.4 scales_0.5.0
[53] StandardizeText_1.0 GGally_1.4.0
[55] DescTools_0.99.24 R.utils_2.6.0
[57] R.oo_1.22.0 R.methodsS3_1.7.1
[59] kableExtra_0.9.0 tictoc_1.0
[61] pryr_0.1.4 reshape_0.8.7
[63] stringi_1.1.7 stringr_1.3.1
[65] RCurl_1.95-4.10 bitops_1.0-6
[67] openxlsx_4.1.0 DT_0.4.15
[69] data.table_1.11.4 pacman_0.4.6
[71] thesisdown_0.0.2 knitr_1.20
[73] bookdown_0.7.13 ggplot2_3.0.0.9000
[75] dplyr_0.7.5 devtools_1.13.5
loaded via a namespace (and not attached):
[1] prabclus_2.2-6 ModelMetrics_1.1.0 rpart_4.1-13
[4] ggfittext_0.6.0 rlist_0.4.6.1 xml2_1.2.0
[7] httpuv_1.4.4.1 assertthat_0.2.0 gower_0.1.2
[10] xfun_0.2.9 hms_0.4.2 evaluate_0.10.1
[13] promises_1.0.1 TSP_1.1-6 DEoptimR_1.0-8
[16] caTools_1.17.1 dendextend_1.8.0 readxl_1.1.0
[19] htmlwidgets_1.2.1 Quandl_2.8.0 ddalpha_1.3.4
[22] stats4_3.5.0 crosstalk_1.0.0 colormap_0.1.4
[25] backports_1.1.2 V8_1.5 trimcluster_0.1-2
[28] gridBase_0.4-7 geosphere_1.5-7 abind_1.4-5
[31] withr_2.1.2 sfsmisc_1.1-2 robustbase_0.93-1
[34] vcd_1.4-4 gclus_1.3.1 mclust_5.4
[37] mnormt_1.5-5 cluster_2.0.7-1 lazyeval_0.2.1
[40] urca_1.3-0 crayon_1.3.4 labeling_0.3
[43] recipes_0.1.3 pkgconfig_2.0.1 nlme_3.1-137
[46] seriation_1.2-3 nnet_7.3-12 bindr_0.1.1
[49] rlang_0.2.1 diptest_0.75-7 pls_2.6-0
[52] stinepack_1.3 registry_0.5 modelr_0.1.2
[55] cellranger_1.1.0 rprojroot_1.3-2 lmtest_0.9-36
[58] boot_1.3-20 base64enc_0.1-3 whisker_0.3-2
[61] png_0.1-7 rjson_0.2.20 KernSmooth_2.23-15
[64] DRR_0.0.3 jpeg_0.1-8 memoise_1.1.0
[67] magrittr_1.5 plyr_1.8.4 gplots_3.0.1
[70] gdata_2.18.0 compiler_3.5.0 dimRed_0.1.0
[73] cli_1.0.0 magic_1.5-8 MASS_7.3-49
[76] tidyselect_0.2.4 highr_0.7 yaml_2.1.19
[79] manipulate_1.0.1 tools_3.5.0 RgoogleMaps_1.4.2
[82] rstudioapi_0.7 foreach_1.4.4 foreign_0.8-70
[85] gridExtra_2.3 prodlim_2018.04.18 scatterplot3d_0.3-41
[88] digest_0.6.15 lava_1.6.1 proto_1.0.0
[91] quadprog_1.5-5 fpc_2.1-11 Rcpp_0.12.17
[94] broom_0.4.4 later_0.7.3 httr_1.3.1
[97] psych_1.8.4 kernlab_0.9-26 colorspace_1.3-2
[100] rvest_0.3.2 CVST_0.2-2 splines_3.5.0
[103] RcppRoll_0.3.0 expm_0.999-2 mapproj_1.2.6
[106] flexmix_2.3-14 xtable_1.8-2 jsonlite_1.5
[109] geometry_0.3-6 timeDate_3043.102 modeltools_0.2-21
[112] ipred_0.9-6 pillar_1.2.3 htmltools_0.3.6
[115] mime_0.5 glue_1.2.0 class_7.3-14
[118] codetools_0.2-15 mvtnorm_1.0-8 curl_3.2
[121] gtools_3.8.1 zip_1.0.0 survival_2.41-3
[124] rmarkdown_1.10 munsell_0.5.0 e1071_1.6-8
[127] uroot_2.0-9 iterators_1.0.9 haven_1.1.1
[130] fracdiff_1.4-2 reshape2_1.4.3 gtable_0.2.0