2. variable importance and model summaries

Load the required libraries:

library(dismo, quietly=T)
library(phytools, quietly = T)
library(phylolm, quietly = T)
library(dplyr, quietly = T)
source("../script/Data.R")

Initialize global variables:

REPO_HOME <- paste(getwd(),'/../',sep='')

Load the list of taxa:

taxa.names <- scan(paste(REPO_HOME, "/data/filtered/taxa.txt", sep = ""), sep = "\n", what = character())

Define the list of GIS layers, i.e. the traits:

gis.layers <- raster::getData(
  "worldclim",
  var = "bio",
  res = 5,
  path = paste(REPO_HOME, "/data/GIS", sep = ""),
  download = T
)

# The location of the TIFF file with the stacked layers described directly above
files.names <- list.files(paste(REPO_HOME, "/data/GIS/5_deg", sep = ""))

# Turn the file names into layer names: strip the prefix (which might include
# the resolution) and strip the file extension
gis.layers.names <- files.names
gis.layers.names <- gsub('current_5arcmin_','',gis.layers.names)
gis.layers.names <- gsub('.tif','',gis.layers.names)

# Combine the layer names with those we've already read from BIOCLIM
gis.layers.names <- c(names(gis.layers),gis.layers.names)

# Iterate over files
for (i in 1:length(files.names)) {
  
  # Stack with previously read layers
  gis.layers <- stack(
    gis.layers,
    
    # Read as raster
    raster(
      
      # Construct file name
      paste(REPO_HOME, "/data/GIS/5_deg/", files.names[i], sep = "")
    )
  )
}

# Apply all names
names(gis.layers) <- gis.layers.names
plot(gis.layers)

Contribution of layers to maxent models

Populate a data.frame of taxa x layers and record the contribution of each of these layers:

traits.contributions.csv <- paste(REPO_HOME, "results/maxent/model_summaries/traits_contribution_maxent.csv", sep="")

 if (!file.exists(traits.contributions.csv)) {
# instantiate variable contributions frame from a matrix of taxa x layers
traits.contributions <- data.frame(matrix(
  nrow = length(taxa.names), 
  ncol = length(gis.layers.names), 
  dimnames = list(taxa.names,gis.layers.names))
)

# start iterating over taxa
for (i in 1: length(taxa.names)) {
  
  # load maxent model if it exists
  maxent.model <- get_maxent_model( REPO_HOME, taxa.names[i] )
  if (!is.null(maxent.model)) {
    
    # the plot function returns a named list with the variable contributions
    var.contrib <- plot(maxent.model)
      
    # iterate over layers, store variable contribution
    for ( j in 1:length(gis.layers.names) ) {
      if ( gis.layers.names[j] %in% colnames(maxent.model@presence) ) {
        traits.contributions[i,j] <- var.contrib[gis.layers.names[j]]
      }
    }
  }
}

write.csv(traits.contributions, traits.contributions.csv)
 }

traits.contributions <- read.csv(traits.contributions.csv)

Now let’s summarize the variable importance:

# make a box plot. as a side effect, the summary statistics are returned
traits.contributions.summary <- boxplot.default(traits.contributions[2:42], varwidth = T, outline = F, col = c("darkorange", "firebrick1", "goldenrod1"))

mean.traits.contributions.csv <- paste(REPO_HOME, "results/maxent/model_summaries/mean_traits_contribution_maxent.csv", sep="")

if (!file.exists(mean.traits.contributions.csv)) {
# we will populate a data frame with the returned summary statistics
traits.contributions.df <- data.frame(matrix(
  nrow = length(gis.layers.names), 
  ncol = 2, 
  dimnames = list(gis.layers.names,c("mean","n")))
)
for ( i in 1:length(traits.contributions.summary$names) ) {
  traits.contributions.df[i,1] <- traits.contributions.summary$stats[i*5-2]
  traits.contributions.df[i,2] <- traits.contributions.summary$n[i]
}

write.csv(traits.contributions.df, mean.traits.contributions.csv)
}

As a last step we summarize all models: AUC values, number of occurrence points and the descending order of variable importance per model.

output.csv<- paste(REPO_HOME, "/Results/maxent/summary_df.csv", sep="")  

if (!file.exists(mean.traits.contributions.csv)) {
  
# create empty dataframe with columns "Species_name", "AUC values, "n", "important_variables"
df.summary <- data.frame(matrix(ncol = 4, nrow = length(taxa.names)))
x <- c("species", "n", "AUC", "important_variables")
colnames(df.summary) <- x

# load AUC df
df.auc<- sprintf("%s/Results/maxent/model_summaries/AUCvalues.csv", REPO_HOME)
df.auc<- read.csv(df.auc, header = T)

# load trait contribution
df.traits<- sprintf("%s/Results/maxent/model_summaries/traits_contribution_maxent.csv", REPO_HOME)
df.traits <- read.csv(df.traits, header = T, row.names = 1)
y <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29","30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41")
colnames(df.traits) <- y

for (i in 1:length(taxa.names)) {
  # set species name
  df.summary[i,1] <- taxa.names[i]
  # count number of occurence points 
  csv.file <- sprintf("%s/data/filtered/%s.csv", REPO_HOME, taxa.names[i])
  csv <- read.csv(csv.file, header = T)
  df.summary[i,2] <- nrow(csv)
  # select the AUC value
  df.summary[i,3] <- df.auc[i,"trainingAUC"]
  # order the trait contribution
  traits.row <- df.traits[i,]
  traits.row<- na.omit(t(traits.row))
  traits.row<-traits.row[order(traits.row, decreasing = TRUE),] 
  traits.row<- as.data.frame(traits.row)
  traits.row<- rownames(traits.row)
  df.summary[i,4] <- paste(traits.row, collapse= "," )
  }

write.csv(df.summary, output.csv)
}