Load the required libraries:
library(dismo, quietly=T)
library(phytools, quietly = T)
library(phylolm, quietly = T)
library(dplyr, quietly = T)
source("../script/Data.R")
Initialize global variables:
REPO_HOME <- paste(getwd(),'/../',sep='')
Load the list of taxa:
taxa.names <- scan(paste(REPO_HOME, "/data/filtered/taxa.txt", sep = ""), sep = "\n", what = character())
Define the list of GIS layers, i.e. the traits:
gis.layers <- raster::getData(
"worldclim",
var = "bio",
res = 5,
path = paste(REPO_HOME, "/data/GIS", sep = ""),
download = T
)
# The location of the TIFF file with the stacked layers described directly above
files.names <- list.files(paste(REPO_HOME, "/data/GIS/5_deg", sep = ""))
# Turn the file names into layer names: strip the prefix (which might include
# the resolution) and strip the file extension
gis.layers.names <- files.names
gis.layers.names <- gsub('current_5arcmin_','',gis.layers.names)
gis.layers.names <- gsub('.tif','',gis.layers.names)
# Combine the layer names with those we've already read from BIOCLIM
gis.layers.names <- c(names(gis.layers),gis.layers.names)
# Iterate over files
for (i in 1:length(files.names)) {
# Stack with previously read layers
gis.layers <- stack(
gis.layers,
# Read as raster
raster(
# Construct file name
paste(REPO_HOME, "/data/GIS/5_deg/", files.names[i], sep = "")
)
)
}
# Apply all names
names(gis.layers) <- gis.layers.names
plot(gis.layers)
Populate a data.frame of taxa x layers and record the contribution of each of these layers:
traits.contributions.csv <- paste(REPO_HOME, "results/maxent/model_summaries/traits_contribution_maxent.csv", sep="")
if (!file.exists(traits.contributions.csv)) {
# instantiate variable contributions frame from a matrix of taxa x layers
traits.contributions <- data.frame(matrix(
nrow = length(taxa.names),
ncol = length(gis.layers.names),
dimnames = list(taxa.names,gis.layers.names))
)
# start iterating over taxa
for (i in 1: length(taxa.names)) {
# load maxent model if it exists
maxent.model <- get_maxent_model( REPO_HOME, taxa.names[i] )
if (!is.null(maxent.model)) {
# the plot function returns a named list with the variable contributions
var.contrib <- plot(maxent.model)
# iterate over layers, store variable contribution
for ( j in 1:length(gis.layers.names) ) {
if ( gis.layers.names[j] %in% colnames(maxent.model@presence) ) {
traits.contributions[i,j] <- var.contrib[gis.layers.names[j]]
}
}
}
}
write.csv(traits.contributions, traits.contributions.csv)
}
traits.contributions <- read.csv(traits.contributions.csv)
Now let’s summarize the variable importance:
# make a box plot. as a side effect, the summary statistics are returned
traits.contributions.summary <- boxplot.default(traits.contributions[2:42], varwidth = T, outline = F, col = c("darkorange", "firebrick1", "goldenrod1"))
mean.traits.contributions.csv <- paste(REPO_HOME, "results/maxent/model_summaries/mean_traits_contribution_maxent.csv", sep="")
if (!file.exists(mean.traits.contributions.csv)) {
# we will populate a data frame with the returned summary statistics
traits.contributions.df <- data.frame(matrix(
nrow = length(gis.layers.names),
ncol = 2,
dimnames = list(gis.layers.names,c("mean","n")))
)
for ( i in 1:length(traits.contributions.summary$names) ) {
traits.contributions.df[i,1] <- traits.contributions.summary$stats[i*5-2]
traits.contributions.df[i,2] <- traits.contributions.summary$n[i]
}
write.csv(traits.contributions.df, mean.traits.contributions.csv)
}
As a last step we summarize all models: AUC values, number of occurrence points and the descending order of variable importance per model.
output.csv<- paste(REPO_HOME, "/Results/maxent/summary_df.csv", sep="")
if (!file.exists(mean.traits.contributions.csv)) {
# create empty dataframe with columns "Species_name", "AUC values, "n", "important_variables"
df.summary <- data.frame(matrix(ncol = 4, nrow = length(taxa.names)))
x <- c("species", "n", "AUC", "important_variables")
colnames(df.summary) <- x
# load AUC df
df.auc<- sprintf("%s/Results/maxent/model_summaries/AUCvalues.csv", REPO_HOME)
df.auc<- read.csv(df.auc, header = T)
# load trait contribution
df.traits<- sprintf("%s/Results/maxent/model_summaries/traits_contribution_maxent.csv", REPO_HOME)
df.traits <- read.csv(df.traits, header = T, row.names = 1)
y <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29","30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41")
colnames(df.traits) <- y
for (i in 1:length(taxa.names)) {
# set species name
df.summary[i,1] <- taxa.names[i]
# count number of occurence points
csv.file <- sprintf("%s/data/filtered/%s.csv", REPO_HOME, taxa.names[i])
csv <- read.csv(csv.file, header = T)
df.summary[i,2] <- nrow(csv)
# select the AUC value
df.summary[i,3] <- df.auc[i,"trainingAUC"]
# order the trait contribution
traits.row <- df.traits[i,]
traits.row<- na.omit(t(traits.row))
traits.row<-traits.row[order(traits.row, decreasing = TRUE),]
traits.row<- as.data.frame(traits.row)
traits.row<- rownames(traits.row)
df.summary[i,4] <- paste(traits.row, collapse= "," )
}
write.csv(df.summary, output.csv)
}