

list.of.packages <- c("deSolve", "plyr", "lubridate", "parallel", "tidyr", "doParallel", "foreach","randomForest","rpart",
                      "tidyverse", "gtools", "rgdal", "lsmeans", "lme4", "sf", "mlbench", "rgdal","rgeos")


list.of.packages <- c("ggplot2", "emmeans", "lme4", "lmerTest")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)



require(ggplot2)
require(emmeans)
require(lme4)
require(lmerTest)
require(tidyr)
require(plyr)
require(gridExtra)
require(lsmeans)
require(reshape2)
require(raster)
require(rgdal)

# source ODK data from wthin R

source("D://workspace//ACAI//ODK_functions.R")

wd <- "D:/workspace/ODK_briefcase_dld"
setwd(wd)

forms <- c("Measure_Potato_PO", "Assign_FDTLPO")
briefCaseDwnld(forms=forms, target=wd, source="ONA")

ds1 <- dropGroupNames(read.csv("Measure_Potato_PO.csv"))
ds2 <- dropGroupNames(read.csv("Measure_Potato_PO-plot.csv"))
ds <- mergeODKforms(ds1, ds2)
ds$treat <- sub("\\_\\(.*", "", ds$POID2_label)
ds$treat <- gsub("_rep1", "", ds$treat)
ds$treat <- gsub("_rep2", "", ds$treat)

dsY <- ds[grepl("tuberYield", ds$parameters),]
dsY$tuberY <- dsY$tubersFW / (dsY$plotLength * dsY$plotWidth) * 10
write.csv(dsY, "D:/workspace/Scaling AKILIMO/Scaling_AKILIMO_yield_data.csv")

###############################################################################
###############################################################################
############### source data from csv file ###############################
setwd("D:/ACAI_Wrapper/Rwanda")
dsY <- read.csv("Potato_yr2.csv")
summary(dsY$tuberY)
hist(dsY$tuberY)

hist(dsY[dsY$treat == "NPK6",]$tuberY)

# data exploration and cleaning
ggplot(dsY, aes(x=treat, y=tuberY)) + 
  geom_violin(draw_quantiles = c(0.25, 0.5, 0.75))+
  geom_jitter(height=0)+
  ylim(0, 75)



summary(dsY$plotLength)
summary(dsY$plotWidth)

dsY$plotLength <- ifelse(dsY$plotLength > 10, dsY$plotLength/10, ifelse(dsY$plotLength==5,4.8, dsY$plotLength)) #correct obvious errors with plot length
dsY$plotWidth <- ifelse(dsY$plotWidth > 10, dsY$plotWidth/10, ifelse(dsY$plotWidth==5,4.8, dsY$plotWidth)) #correct obvious errors with plot width
dsY$tuberY <- dsY$tubersFW / (dsY$plotLength * dsY$plotWidth) * 10

dsY$district <- ifelse(dsY$ENID=='SAENRW000001', 'Burera', ifelse(dsY$ENID=='SAENRW000011', 'Burera', ifelse(dsY$ENID=='SAENRW000003', 'Burera',ifelse(dsY$ENID=='SAENRW000012', 'Burera',ifelse(dsY$ENID=='SAENRW000002', 'Rutsiro', ifelse(dsY$ENID=='SAENRW000006', 'Rutsiro',ifelse(dsY$ENID=='SAENRW000008', 'Rutsiro',ifelse(dsY$ENID=='SAENRW000005', 'Rubavu',ifelse(dsY$ENID=='SAENRW000007', 'Rubavu', ifelse(dsY$ENID=='SAENRW000009', 'Rubavu', '')))))))))) #add districts based on enumerators IDs

dsY$plDens <- dsY$nrPlants / (dsY$plotLength * dsY$plotWidth) # calculate plant density
dsY$YperPlant <- dsY$tuberY / dsY$plDens*10 # calculate yield per plant
summary(dsY$plDens)
summary(dsY$YperPlant)
par(cex.lab=1.5) # is for y-axis
par(cex.axis=1.5) # is for x-axis
boxplot(dsY$plDens ~ FDID2, data=dsY, ylab="Plant density (m-2)", xlab='Field', xaxt="n")
boxplot(dsY$plDens ~ district, data=dsY, ylab="Plant density (m-2)")
boxplot(dsY$YperPlant ~ district, data=dsY, ylab="Tuber yield per plant (kg plant-1)")

par(mar=c(10, 5, 4, 2))
par(mfrow=c(2,2))
boxplot(tuberY ~ treat, data=dsY, ylab="Tuber yield FW (mg/ha)", main="All data", las=2)
boxplot(tuberY ~ treat, data=dsY, subset=dsY$district=='Burera', ylab="Tuber yield FW (mg/ha)", main="Burera", las=2)
boxplot(tuberY ~ treat, data=dsY, subset=dsY$district=='Rubavu', ylab="Tuber yield FW (mg/ha)", main="Rubavu", las=2)
boxplot(tuberY ~ treat, data=dsY, subset=dsY$district=='Rutsiro', ylab="Tuber yield FW (mg/ha)",main="Rutsiro", las=2)

dsY[dsY$district == "Burera" & dsY$tuberY > 55, ]
dsY[dsY$tuberY > 55, ]
plot(dsY$lon, dsY$lat)

plot(dsY$lon, dsY$lat, xlim=c(29.7, 30.0), ylim=c(-1.6, -1.48))

unique(dsY[dsY$lat > -1.51 & dsY$lat <  -1.50 & dsY$lon>29.7, ]$lon)
unique(dsY[dsY$lat > -1.51 & dsY$lat <  -1.50 & dsY$lon>29.81,]$lon)
unique(dsY[dsY$lat < -1.9 & dsY$lat > -1.95  & dsY$lon < 29.47,]$lon)


unique(dsY$cluster)
dsY$cluster <- ifelse(dsY$lat > -1.55 & dsY$lon<29.6, "clus_1", 
                      ifelse(dsY$lat > -1.75 & dsY$lon<29.35, "clus_2",
                             ifelse(dsY$lat > -1.629 & dsY$lat < -1.55& dsY$lon > 29.35 & dsY$lon < 29.45, "clus_3",
                                    ifelse(dsY$lat > -1.65 & dsY$lat < -1.629& dsY$lon > 29.35 & dsY$lon < 29.45, "clus_4",
                                           ifelse(dsY$lat > -1.75 & dsY$lat < -1.7& dsY$lon > 29.35 & dsY$lon < 29.45, "clus_5",
                                                  ifelse(dsY$lat > -1.81 & dsY$lat < -1.75&  dsY$lon < 29.4, "clus_6", 
                                                         ifelse(dsY$lat < -1.81& dsY$lat > -1.86 & dsY$lon > 29.35 & dsY$lon < 29.45, "clus_7",
                                                                ifelse(dsY$lat < -1.9 & dsY$lat > -1.95  & dsY$lon < 29.47, "clus_14",
                                                                       ifelse(dsY$lat < -1.9 & dsY$lat > -2  & dsY$lon > 29.47, "clus_15", 
                                                                              ifelse(dsY$lat > -1.50 & dsY$lon>29.8, "clus_8", 
                                                                                     ifelse(dsY$lat > -1.512 & dsY$lat <  -1.50 & dsY$lon>=29.79 & dsY$lon <= 29.85,"clus_11",
                                                                                            ifelse(dsY$lat < -1.512 & dsY$lat >  -1.525 & dsY$lon<29.85 & dsY$lon>29.8, "clus_12", 
                                                                                                   ifelse(dsY$lat >= -1.575 & dsY$lat < -1.525 & dsY$lon > 29.83, "clus_16",
                                                                                                          ifelse(dsY$lat < -2, "clus_13", "Unknown"))))))))))))))
                                                                                                   
                                                                       
                                                                
                                                                
                                                                                            
                                                                                     
                                                                              


ggplot(dsY, aes(lon, lat, col = factor(cluster)))+
  geom_point(size=3)+
  geom_point(data=dsY[dsY$cluster == 'Unknown', ]) +
  # xlim(29.7, 30.0)+ ylim(-1.6, -1.48) + 
  theme_bw()

dsYsel <- subset(dsY, dsY$district=='Rubavu'|dsY$district=='Rutsiro') #drop data from Burera (district with disease problems)
dsYsel$cluster <- as.factor(dsYsel$cluster)

ggplot(dsYsel, aes(cluster, tuberY, fill=factor(treat)))+
  geom_boxplot()+
  facet_wrap(~district)+
  theme_bw()

length(unique(dsYsel$FDID2))
length(unique(dsYsel$TLID2))

head(dsYsel)

dsYsel <- unique(dsYsel[, c("lat", "lon", "FDID2","treat", "tuberY", "district","cluster")])

dsnpk <- dsYsel[dsYsel$treat == "NPK11", ]
dsNnpk <- dsYsel[!dsYsel$treat == "NPK11", ]

dsnpkav <- ddply(dsnpk, .(lat,lon,FDID2,district, cluster), summarize, avNPK  = mean(tuberY ))

dsNnpk <- unique(merge(dsNnpk, dsnpkav, by=c("lat","lon","FDID2","district", "cluster")))
head(dsNnpk)
dsNnpk$Ydiff <- dsNnpk$avNPK - dsNnpk$tuberY

ggplot(dsNnpk, aes(avNPK, tuberY, fill=factor(cluster)))+
  geom_boxplot()+
  facet_wrap(~district)
  
ggplot(data=dsNnpk, aes(x=Ydiff, y=treat ,col = district)) +
  facet_grid(district ~ treat) +
   geom_point(size=1.2) 


# data analysis
mod1 <- lmer(tuberY ~ 1 + treat + (1|FDID2), data=dsYsel)
plot(mod1)
#note: transformation of data required... heteroskedastic residuals 

mod2 <- lmer(sqrt(tuberY) ~ treat + (1|FDID2), data=dsYsel)
plot(mod2) # no more heteroskedastici residuals
dsY <- dsY[which(abs(resid(mod2))<2),] 


lsm <- emmeans(mod2, ~treat, lmerTest.limit = 5000)
dif <- contrast(regrid(lsm, transform="response"), "trt.vs.ctrl", ref = 1, adjust="sidak")
dif <- contrast(regrid(lsm, transform="response"), "trt.vs.ctrl", ref = 5, adjust="sidak")

mod3 <- lmer(sqrt(tuberY) ~ treat + (0+treat|FDID2), data=dsYsel)
plot(mod3)
anova(mod2, mod3) #no significant random slope -> indication for no site-specific responses
summary(mod2)

mod4 <- lmer(sqrt(tuberY) ~ treat*district + (1|FDID2), data=dsYsel) #evaluate interaction between treatment and district
plot(mod4)
anova(mod2, mod3)

mod5 <- lmer(sqrt(tuberY) ~ treat*district + (0+treat|FDID2),data=dsYsel)
plot(mod5)
anova(mod4, mod5)


mod6 <- lmer(sqrt(tuberY) ~ treat*district + (0+treat|FDID2) + (1|cluster), data=dsYsel)
plot(mod6)
anova(mod5, mod6)

## get BLUPs

ref.grid(mod6)
lsm6 <- lsmeans(mod6, c("treat", "district"))
lsmDF <- as.data.frame(lsm6)
lsmDF <- lsmDF[,1:3]
names(lsmDF)[1] <- "treat"
names(lsmDF)[3] <- "lsmean"
row.names(lsmDF) <- NULL



ranFD <- ranef(mod6)$FDID2
ranCluster <- ranef(mod6)$cluster
names(ranCluster) <- "clusRanef"
ranFD$FD <- row.names(ranFD)
ranCluster$cluster <- row.names(ranCluster)
ranCluster<- unique(merge(ranCluster, unique(dsYsel[, c("district", "cluster", "FDID2")])))

names(ranFD) <- gsub("treat", "", names(ranFD))
ranFD <- melt(ranFD, 
             measure.vars=c("NPK11","NPK4_DAP2","NPK4_MOP2","NPK4_UREA2","NPK6"),
             variable.name="treat",
             id.vars="FD",
             value.name="blup")

head(ranFD)
rownames(ranFD) <- NULL
ranFD <- merge(ranFD, ranCluster, by.x="FD", by.y="FDID2")
ranFD$blup <- ranFD$blup + ranFD$clusRanef
ranFD <- ranFD[, c("district", "cluster", "FD",  "treat", "blup")]
head(ranFD)


fixRan <- merge(ranFD, lsmDF, by=c("treat", "district"))
fixRan$blup <- fixRan$blup + fixRan$lsmean
fixRan <- subset(fixRan, select=-c(lsmean))
head(fixRan)
fixRan$blup <- (fixRan$blup)^2
summary(fixRan$blup)
summary(dsYsel$tuberY)

fixRan <- unique(fixRan[, c("district","cluster","FD", "treat", "blup")])


fixRanWide <- spread(fixRan, treat, blup)
head(fixRanWide)
saveRDS(fixRanWide, "Potato_fixRanWide_21.RDS")

longData <- gather(fixRanWide, treat, value, NPK4_DAP2:NPK6)
head(longData)


longData$YieldDiff <- longData$NPK11 - longData$value
unique(longData$treat )
saveRDS(longData, "Potato_longData_21.RDS")


ggplot(data=longData, aes(value, y=YieldDiff, col = factor(treat))) +
  facet_grid(district~treat) +
  stat_density2d(geom="path") +
  geom_point(size=1.2) +
  geom_abline(intercept=0,slope=0, col="darkgrey") +
  xlab("") +
  ylab("Yield difference [t/ha]") +
  theme_bw() +
  theme(axis.text  = element_text(size=12), legend.position = "none",
        axis.title = element_text(size=15), strip.text = element_text(size=12), 
        legend.text =element_text(size=15), legend.title = element_blank())


##########################################################################################
############################## reverse QUEFTS#############################
fixRanWide <- readRDS("Potato_fixRanWide_21.RDS")
head(fixRanWide)


### change fresh matter to dry matter
fixRanWide$DM_NPK <- fixRanWide$NPK11 * 0.21 * 1000
fixRanWide$NPK4_DAP2_DM <- fixRanWide$NPK4_DAP2 * 0.21 * 1000
fixRanWide$NPK4_MOP2_DM <- fixRanWide$NPK4_MOP2 * 0.21 * 1000
fixRanWide$NPK4_UREA2_DM <- fixRanWide$NPK4_UREA2 * 0.21 * 1000
fixRanWide$NPK6_DM <- fixRanWide$NPK6 * 0.21 * 1000
head(fixRanWide)

#RecoveryFraction <- data.frame(rec_N=0.5, rec_P=0.21, rec_K=0.49)## cassava
RecoveryFraction <- data.frame(rec_N=0.41, rec_P=0.25, rec_K=0.65)## potato

## fertilizers added
#NPK171717:  0.17, P_cont = 0.083, K_cont=0.15
#DAP: N_cont = 0.18, P_cont = 0.20, K_cont=0.0
#MOP : N_cont = 0.00, P_cont = 0.00, K_cont=0.50

fert <- rbind(
data.frame(fertilizer = "NPK11", Ncont = 11*0.17, Pcont = 11*0.083, Kcont= 11 *0.15),
data.frame(fertilizer = "NPK4_DAP2", Ncont =(4*0.17 + (2*0.18)), Pcont = (4*0.083 + (2*0.2)), Kcont= 4*0.15),
data.frame(fertilizer = "NPK4_MOP2", Ncont =4*0.17, Pcont = 4*0.083, Kcont= (0.5*2)+4*0.15),
data.frame(fertilizer = "NPK4_UREA2", Ncont =(4*0.17 + (2*0.46)), Pcont = (4*0.083), Kcont= 4*0.15),
data.frame(fertilizer = "NPK6", Ncont = 6*0.17, Pcont = 6*0.083, Kcont= 6 *0.15))


fert <- data.frame(type = c( "NPK6", "NPK4_MOP2",  "NPK4_UREA2",  "NPK4_DAP2", "NPK11"),
                   N = c(51,34,80,52,94), P =c(22,15,15,35,41), K = c(42,78,28,28,78))
addedFertilizer <- fert[fert$type %in% c("NPK6", "NPK4_MOP2", "NPK4_UREA2","NPK4_DAP2"), c("N","P","K") ]
names(addedFertilizer) <- c("FN","FP","FK")

addedFertilizerX <- as.data.frame(matrix(nrow=4,ncol=3))
colnames(addedFertilizerX) <- c("FN", "FP", "FK")
addedFertilizerX[,1] <- c(0, 0, 150, 150)
addedFertilizerX[,2] <- c(0, 40, 0, 40)
addedFertilizerX[,3] <- c(0, 180, 180, 0)	



source("QUEFTS_function.R")

soilNPK <- NULL
validation_result <- NULL
for(i in 1:nrow(fixRanWide)){
  oneTrial <- fixRanWide[i,]	
  WLY <- oneTrial$DM_NPK # is in kg/ha dry root
  

  YieldMatrix <- as.data.frame(matrix(nrow=4,ncol=2))
  colnames(YieldMatrix) <- c("treatment", "NOT_yield")
  YieldMatrix[,1] <- c("Control", "PK", "NK", "NP")
  YieldMatrix[,2] <- c(oneTrial$NPK6_DM, oneTrial$NPK4_MOP2_DM, oneTrial$NPK4_UREA2_DM, oneTrial$NPK4_DAP2_DM )
  
  ## optimizing by min TSS. 
  soil_NPK <- optim(par=c(0,0,0), optim_INS, lower=c(0.1, 0.1, 0.1), method = "L-BFGS-B", addedFertilizer=addedFertilizer, YieldMatrix=YieldMatrix, WLY=WLY)$par	
  
 
    Yield_control <- Yield_S(SN = addedFertilizer[1,1] + soil_NPK[1] , SP = addedFertilizer[1,2] + soil_NPK[2], 
                             SK = addedFertilizer[1,3] + soil_NPK[3], WLY = WLY)
    oneTrial$Control_observed <- oneTrial$ NPK6_DM
    oneTrial$Control_estimated <-  Yield_control

  
  
  validation_result <- rbind(validation_result, oneTrial)
  
  snpk <- oneTrial
  snpk$soilN <- soil_NPK[1]
  snpk$soilP <- soil_NPK[2]
  snpk$soilK <- soil_NPK[3]
  soilNPK <- rbind(soilNPK, snpk)
}


potato_reverseQUEFTS <- ggplot(validation_result, aes(Control_observed, Control_estimated, col=factor(district)))+
  geom_point(size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_bw()
  
ggsave(potato_reverseQUEFTS, "Potato.ReverseQUEFTS.pdf")



soilNPK_GPS <- unique(merge(soilNPK, dsY[, c("FDID2", "lon", "lat")], by="FDID2", "FD"))
summary(soilNPK_GPS$soilN)
summary(soilNPK_GPS$soilP)
summary(soilNPK_GPS$soilK)


ggplot(soilNPK_GPS, aes(lat, lon)) +
  geom_tile(aes(fill = soilN))



library(plotly)

plot_ly(
  soilNPK_GPS, x= ~lon, y= ~lat, z= ~soilN,
  type='mesh3d', intensity = ~soilN,
  colors= colorRamp(rainbow(5))
)



###  get isrics data

setwd("/home/akilimo/projects/SoilNPK")
Potato_yr2_soilNPK_GPS <- readRDS("Potato_yr2_soilNPK_GPS.RDS")

setwd("/home/akilimo/lintul/lintul")
source("sourceISRIC.R")


get_ISIRIC_AOI <- function(AOI_GPS){
  ISRIC_AOI <- NULL
  for(i in 1:nrow(AOI_GPS)){
    onePoint <- getISRICData(lat = AOI_GPS$lat[i], lon = AOI_GPS$lon[i])
    ISRIC_AOI <- rbind(ISRIC_AOI, onePoint)
  }
  return(ISRIC_AOI)
}


AOI_GPS <- unique(Potato_yr2_soilNPK_GPS[, c("FD", "lon", "lat")])

POt_RW_ISRIC <- get_ISIRIC_AOI(AOI_GPS = AOI_GPS)




## get iSDA data for the trial locations
PlandCropCover <- readRDS("potato_isda_cover.RDS")
potato_isda <- readRDS("potato_isda.RDS")






PlandCropCover <- PlandCropCover[ , c("slope_angle", "fcc")]





GIS_soilINS_modData2 <- soilNPK[, c("soilN", "soilP", "soilK", "exchK", "P_Mehlich", "Clay_5","Clay_15","Clay_30","percentSOM_5","percentSOM_15","percentSOM_30",
                                    "pH_5","pH_15","pH_30", "silt_5", "silt_15", "silt_30","BD_5", "BD_15", "BD_30", "CEC_5","CEC_15","CEC_30", "percentSOC_5",
                                    "percentSOC_15", "percentSOC_30", "FC_5", "FC_15", "FC_30", "wp_5", "wp_15", "wp_30", "sws_5", "sws_15", "sws_30",
                                    "TotalN", "Mn","B", "Ca","Fe", "Cu","Al", "Mg", "Na","ncluster", "Country", "Control")]

  
  
  
  










