A Appendix I

A.1 Initial data preparation script

if (!require("pacman")) install.packages("pacman")
pacman::p_load(knitr, pryr, openxlsx, tidyverse, 
               data.table, DT, DescTools, RCurl, countrycode)
options(warn = -1, digits = 4, scipen = 999)
#------------------------------------------------------------
#External data (country geocodes to replace missing lat lons)
#------------------------------------------------------------
geocodes <-fread("https://github.com/oughton/geocode/raw/master/example/result.csv")%>%
  select(country = V1, country_latitude = V2, country_longitude = V3) %>%
  mutate(ISO = countrycode(country, 'country.name', 'iso3c')) %>%
  filter(!is.na(ISO)) %>%
  select(ISO, country_latitude, country_longitude)

saveRDS(geocodes, "country_geocodes.rds")  
country_geocodes <- readRDS("country_geocodes.rds")

#---------------------------------------
#data preparation (GTD)
#---------------------------------------
tmp <- read.xlsx("data/data_preparation/globalterrorismdb_0617dist.xlsx", 
                 sheet = 1, colNames = TRUE) %>% 
  select(eventid, 
         year = iyear, 
         month = imonth, 
         day = iday, 
         country = country_txt, 
         region = region_txt, 
         provstate, 
         city, 
         latitude, # 2.7% NAs will be replaced with country level geocodes
         longitude,
         attack_type = attacktype1_txt, 
         weapon_type = weaptype1_txt, 
         target_type = targtype1_txt, 
         target_nalty= natlty1_txt, 
         group_name  = gname, 
         nkill,   # 5% NAs
         nwound,  # 9% NAs
         extended, 
         crit1_pol_eco_rel_soc = crit1, 
         crit2_publicize = crit2, 
         crit3_os_intl_hmn_law = crit3, 
         part_of_multiple_attacks = multiple, 
         attack_success = success, 
         suicide_attack = suicide, 
         individual_attack = individual,
         intl_logistical_attack = INT_LOG, 
         intl_ideological_attack = INT_IDEO 
         ) %>%
  replace_na(list(provstate = "unknown",       # replace nas with unknown
                  city =  "unknown",
                  target_nalty = "unknown")) %>%
  mutate(ISO = countrycode(country, 'country.name', 'iso3c'), #standardize country name
         month = if_else(month == 0, 1, month),#replace unknown month to 1 in 20 occurences 
         day = if_else(day == 0, 1, day), #replace unknown day to 1 in 891 occurences
         date = paste(year, month, day, sep="-"),
         date = as.Date(date, format = "%Y-%m-%d"),
         weapon_type = if_else(
           weapon_type == "Vehicle (not to include vehicle-borne
                           explosives, i.e., car or truck bombs)", 
                          "Vehicle", weapon_type)) %>% # shorten lengthy name
  left_join(country_geocodes) %>% 
  mutate(latitude = ifelse(is.na(latitude), country_latitude, 
                           latitude), # replace missing lat lons with country lat lons
         longitude = ifelse(is.na(longitude), country_longitude, longitude)) %>%
  select(-c(country_latitude, country_longitude)) %>%
  # replace missing lat lons in remaining (~14) disputed/dissolved countries 
  # with country level lat long from prev obs
  mutate(
    latitude = if_else(is.na(latitude) & country == 
                          "People's Republic of the Congo", -0.2, latitude),
     longitude = if_else(is.na(longitude) & country == 
                           "People's Republic of the Congo", 15.8, longitude),
     latitude = if_else(is.na(latitude) & country == 
                          "Democratic Republic of the Congo", -4.0, latitude),
     longitude = if_else(is.na(longitude) & country == 
                           "Democratic Republic of the Congo", 21.7, longitude),
     latitude = if_else(is.na(latitude) & country == 
                          "North Yemen", 15.5, latitude),
     longitude = if_else(is.na(longitude) & country == 
                           "North Yemen", 48.5, longitude),
     latitude = if_else(is.na(latitude) & country == 
                          "South Yemen", 12.8, latitude),
     longitude = if_else(is.na(longitude) & country == 
                           "South Yemen", 45.0, longitude),
     latitude = if_else(is.na(latitude) & country == 
                          "Western Sahara", 27.4, latitude),
     longitude = if_else(is.na(longitude) & country == 
                           "Western Sahara", -9.0, longitude),
     latitude = if_else(is.na(latitude) & country == 
                          "Guadeloupe", 16.2, latitude),
     longitude = if_else(is.na(longitude) & country == 
                           "Guadeloupe", -61.5, longitude),
     latitude = if_else(is.na(latitude) & country == 
                          "New Caledonia", -20.9, latitude),
     longitude = if_else(is.na(longitude) & country == 
                           "New Caledonia", 165.6, longitude),
     latitude = if_else(is.na(latitude) & country == "Martinique", 14.6, latitude),
     longitude = if_else(is.na(longitude) & country == "Martinique", -61.0, longitude),
     latitude = if_else(is.na(latitude) & country == "Zaire", -2.5, latitude),
     longitude = if_else(is.na(longitude) & country == "Zaire", 28.8, longitude),
     latitude = if_else(is.na(latitude) & country == "Kosovo", 43.1, latitude),
     longitude = if_else(is.na(longitude) & country == "Kosovo", 20.7, longitude),
     latitude = if_else(is.na(latitude) & country == 
                          "Czechoslovakia", 50.6, latitude),
     longitude = if_else(is.na(longitude) & country == 
                           "Czechoslovakia", 14.0, longitude),
     latitude = if_else(is.na(latitude) & country == "Yugoslavia", 42.5, latitude),
     longitude = if_else(is.na(longitude) & country == "Yugoslavia", 20.5, longitude)
     )

#--------------------------------------------------------------
#External data (World Devlopment Indicators from worldbank api)
#--------------------------------------------------------------
WDIsearch('conflict') # enter search text and extract code

ind = c(
  "arms_export" = "MS.MIL.XPRT.KD",   # Arms exports (SIPRI trend indicator values)
  "arms_import" = "MS.MIL.MPRT.KD",   # Arms imports (SIPRI trend indicator values)
  "population" = "SP.POP.TOTL",       # Population, total
  "gdp_per_capita" = "NY.GDP.PCAP.KD",# GDP per capita (constant 2010 US$)
  "refugee_origin" = "SM.POP.REFG.OR",# Refugee population by country of origin
  "refugee_asylum" = "SM.POP.REFG",   # Refugee population by country of asylum
  "net_migration" = "SM.POP.NETM",    # Net migration
  "n_peace_keepers" = "VC.PKP.TOTL.UN",# Presence of peace keepers 
  "conflict_index" = "IC.PI.CIR")     # conflict index (0-10)

countries_vec <- as.vector(unique(df$ISO)) # countries in gtd dataset

wdi_data <- WDI(indicator = ind, start = 1970, end = 2016, extra = TRUE) %>%
  select(year, ISO = iso3c, arms_export, arms_import, population, 
         gdp_per_capita, refugee_origin, refugee_asylum, net_migration, 
         n_peace_keepers, conflict_index) %>% 
  drop_na(ISO) %>%
  filter(ISO %in% countries_vec) %>%
  # replacing NAs for visualization and modelling purpose
  replace_na(list(arms_export = 0, 
                  arms_import = 0, 
                  population = -1, 
                  gdp_per_capita = 0, 
                  refugee_origin = 0, 
                  refugee_asylum = 0, 
                  net_migration = 0, 
                  n_peace_keepers = 0, 
                  conflict_index = -1)) 


df <- df %>% left_join(wdi_data)
saveRDS(df, "gtd_clean_v2.rds")

# move all data to: gtd_eda/index/data  path for shiny and thesis writing
# "df" is the main file used throughout this research

#---------------------------------------
# iso3c file for worldmap
#---------------------------------------
countries <- df %>% group_by(country) %>% summarise(total = round(n())) 
countries$iso3 <- countrycode(countries$country, 
                              origin = "country.name", destination = "iso3c")
saveRDS(countries, "countries.rds")

A.2 List of variables and short description

Table A.1: Short description of important variables
Name of the Variable description
eventid a 12-digit Event ID
year year in which the incident occurred
month month
day day
country country
region world region
provstate an administrative division or unit of a country
city city
latitude latitude
longitude longitude
attack_type method of attack (reflects the broad class of tactics used)
weapon_type type of weapon used in the incident
target_type type of target/victim
target_nalty nationality of the target that was attacked
group_name name of the group that carried out the attack
nkill number of total confirmed fatalities for the incident
nwound number of confirmed non-fatal injuries
extended whether or not an incident extended more than 24 hours
crit1_pol_eco_rel_soc political, economic, religious, or social goal
crit2_publicize intention to coerce, or publicize to larger audience
crit3_os_intl_hmn_law action from the incident is outside intl humanitarian law
part_of_multiple_attacks whether an incident being part of multiple attacks
attack_success suicide attack
suicide_attack whether an incident was successful
individual_attack whether an attack carried out by unaffiliated Individual(s)
intl_logistical_attack cross border incident
intl_ideological_attack attack on target of a different nationality
ISO ISO code for country
date Approx. date of incident
arms_export Arms exports (SIPRI trend indicator values)
arms_import Arms imports (SIPRI trend indicator values)
population Population, total
gdp_per_capita GDP per capita (constant 2010 US$)
refugee_origin Refugee population by country or territory of origin
refugee_asylum Refugee population by country or territory of asylum
net_migration Net migration
n_peace_keepers Presence of peace keepers
conflict_index Extent of conflict of interest regulation index (0-10)

A.3 R Session Info:

sessionInfo()
R version 3.5.0 (2018-04-23)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17134)

Matrix products: default

locale:
[1] LC_COLLATE=English_United Kingdom.1252 
[2] LC_CTYPE=English_United Kingdom.1252   
[3] LC_MONETARY=English_United Kingdom.1252
[4] LC_NUMERIC=C                           
[5] LC_TIME=English_United Kingdom.1252    

attached base packages:
[1] parallel  grid      stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] bindrcpp_0.2.2             ggthemes_3.5.0            
 [3] servr_0.10                 lightgbm_2.1.2            
 [5] R6_2.2.2                   pROC_1.12.1               
 [7] caret_6.0-80               lattice_0.20-35           
 [9] eply_0.1.2                 maps_3.3.0                
[11] maptools_0.9-2             sp_1.3-1                  
[13] ggmap_2.6.1                shiny_1.1.0               
[15] treemapify_2.5.0           WDI_2.5                   
[17] RJSONIO_1.3-0              imputeTS_2.7              
[19] tseries_0.10-45            forecast_8.4              
[21] tidyquant_0.5.5            forcats_0.3.0             
[23] purrr_0.2.5                readr_1.1.1               
[25] tidyr_0.8.1                tibble_1.4.2              
[27] tidyverse_1.2.1            quantmod_0.4-13           
[29] TTR_0.23-3                 PerformanceAnalytics_1.5.2
[31] xts_0.10-2                 zoo_1.8-2                 
[33] timetk_0.1.1               TSstudio_0.1.1.9000       
[35] igraph_1.2.1               visNetwork_2.0.4          
[37] arulesViz_1.3-1            arules_1.6-1              
[39] Matrix_1.2-14              d3heatmap_0.6.1.2         
[41] treemap_2.4-2              highcharter_0.6.0         
[43] plotly_4.7.1.9000          ggfortify_0.4.5           
[45] RColorBrewer_1.1-2         viridis_0.5.1             
[47] viridisLite_0.3.0          leaflet.extras_1.0.0      
[49] leaflet_2.0.1              countrycode_1.00.0        
[51] lubridate_1.7.4            scales_0.5.0              
[53] StandardizeText_1.0        GGally_1.4.0              
[55] DescTools_0.99.24          R.utils_2.6.0             
[57] R.oo_1.22.0                R.methodsS3_1.7.1         
[59] kableExtra_0.9.0           tictoc_1.0                
[61] pryr_0.1.4                 reshape_0.8.7             
[63] stringi_1.1.7              stringr_1.3.1             
[65] RCurl_1.95-4.10            bitops_1.0-6              
[67] openxlsx_4.1.0             DT_0.4.15                 
[69] data.table_1.11.4          pacman_0.4.6              
[71] thesisdown_0.0.2           knitr_1.20                
[73] bookdown_0.7.13            ggplot2_3.0.0.9000        
[75] dplyr_0.7.5                devtools_1.13.5           

loaded via a namespace (and not attached):
  [1] prabclus_2.2-6       ModelMetrics_1.1.0   rpart_4.1-13        
  [4] ggfittext_0.6.0      rlist_0.4.6.1        xml2_1.2.0          
  [7] httpuv_1.4.4.1       assertthat_0.2.0     gower_0.1.2         
 [10] xfun_0.2.9           hms_0.4.2            evaluate_0.10.1     
 [13] promises_1.0.1       TSP_1.1-6            DEoptimR_1.0-8      
 [16] caTools_1.17.1       dendextend_1.8.0     readxl_1.1.0        
 [19] htmlwidgets_1.2.1    Quandl_2.8.0         ddalpha_1.3.4       
 [22] stats4_3.5.0         crosstalk_1.0.0      colormap_0.1.4      
 [25] backports_1.1.2      V8_1.5               trimcluster_0.1-2   
 [28] gridBase_0.4-7       geosphere_1.5-7      abind_1.4-5         
 [31] withr_2.1.2          sfsmisc_1.1-2        robustbase_0.93-1   
 [34] vcd_1.4-4            gclus_1.3.1          mclust_5.4          
 [37] mnormt_1.5-5         cluster_2.0.7-1      lazyeval_0.2.1      
 [40] urca_1.3-0           crayon_1.3.4         labeling_0.3        
 [43] recipes_0.1.3        pkgconfig_2.0.1      nlme_3.1-137        
 [46] seriation_1.2-3      nnet_7.3-12          bindr_0.1.1         
 [49] rlang_0.2.1          diptest_0.75-7       pls_2.6-0           
 [52] stinepack_1.3        registry_0.5         modelr_0.1.2        
 [55] cellranger_1.1.0     rprojroot_1.3-2      lmtest_0.9-36       
 [58] boot_1.3-20          base64enc_0.1-3      whisker_0.3-2       
 [61] png_0.1-7            rjson_0.2.20         KernSmooth_2.23-15  
 [64] DRR_0.0.3            jpeg_0.1-8           memoise_1.1.0       
 [67] magrittr_1.5         plyr_1.8.4           gplots_3.0.1        
 [70] gdata_2.18.0         compiler_3.5.0       dimRed_0.1.0        
 [73] cli_1.0.0            magic_1.5-8          MASS_7.3-49         
 [76] tidyselect_0.2.4     highr_0.7            yaml_2.1.19         
 [79] manipulate_1.0.1     tools_3.5.0          RgoogleMaps_1.4.2   
 [82] rstudioapi_0.7       foreach_1.4.4        foreign_0.8-70      
 [85] gridExtra_2.3        prodlim_2018.04.18   scatterplot3d_0.3-41
 [88] digest_0.6.15        lava_1.6.1           proto_1.0.0         
 [91] quadprog_1.5-5       fpc_2.1-11           Rcpp_0.12.17        
 [94] broom_0.4.4          later_0.7.3          httr_1.3.1          
 [97] psych_1.8.4          kernlab_0.9-26       colorspace_1.3-2    
[100] rvest_0.3.2          CVST_0.2-2           splines_3.5.0       
[103] RcppRoll_0.3.0       expm_0.999-2         mapproj_1.2.6       
[106] flexmix_2.3-14       xtable_1.8-2         jsonlite_1.5        
[109] geometry_0.3-6       timeDate_3043.102    modeltools_0.2-21   
[112] ipred_0.9-6          pillar_1.2.3         htmltools_0.3.6     
[115] mime_0.5             glue_1.2.0           class_7.3-14        
[118] codetools_0.2-15     mvtnorm_1.0-8        curl_3.2            
[121] gtools_3.8.1         zip_1.0.0            survival_2.41-3     
[124] rmarkdown_1.10       munsell_0.5.0        e1071_1.6-8         
[127] uroot_2.0-9          iterators_1.0.9      haven_1.1.1         
[130] fracdiff_1.4-2       reshape2_1.4.3       gtable_0.2.0