



######################################################################################################################3
######################################################################################################################3
## Rwanda Potato
######################################################################################################################3
setwd("/home/akilimo/lintul/lintul/dataSources/rwanda")
RP <- read.csv("RwandaPotato.csv")
head(RP)
RP$location <- paste(RP$Longitude, RP$Latitude, sep="_")
## get soilGrids data

ggplot(RP, aes(NPK, All))+
  geom_point()+
  geom_abline(slope=1, intercept = 0)

ggplot(RP, aes(NPK, All))+
  geom_point()+
  geom_abline(slope=1, intercept = 0)



setwd("/home/akilimo/lintul/lintul")
source("sourceISRIC.R")
coordPoints <- unique(RP[, c("Longitude","Latitude")])
coordPoints <- coordPoints[!is.na(coordPoints$Longitude), ]
names(coordPoints) <- c("lon", "lat")
ISRICData <- NULL
for(i in 1:nrow(coordPoints)){
  isd <- getISRICData(lat = coordPoints$lat[i], lon = coordPoints$lon[i])
  ISRICData <- rbind(ISRICData, isd)
}

head(ISRICData)

setwd("/home/akilimo/projects/SoilNPK/Rwanda")
write.csv(ISRICData, "ISRICData_Potato_RW.csv", row.names = FALSE)

ISRICData <- read.csv("ISRICData_Potato_RW.csv")
ISRICData$location <- paste(ISRICData$long, ISRICData$lat, sep="_")


RP2 <- subset(RP, select=-c(Province, District,Sector,Farmes_name, SiteNr, Latitude, Longitude, Elevation, Control,
                            NPK, All, All_Reduced_N, All_reduced_P, All_reduced_K,  X, SOIL.SAMPLE.CODE))
names(RP2) <- paste("RP", names(RP2), sep="_")
head(RP2)

RP3 <- unique(merge(ISRICData, RP2,  by.x="location",by.y="RP_location" ))
head(RP3)



gg1 <- ggplot(RP3, aes(RP_B, B))+
  geom_point() +
  xlab("B data") + ylab("B ISRIC")+
  theme_bw()

gg2 <- ggplot(RP3, aes(RP_Org_C, percentSOC_averaged))+
  geom_point() +
  xlab("Org C data") + ylab("Org C ISRIC")+
  theme_bw()

gg3 <- ggplot(RP3, aes(RP_N_total, TotalN))+
  geom_point() +
  xlab("N total data") + ylab("N total ISRIC")+
  theme_bw()

gg4 <- ggplot(RP3, aes(RP_pH, pH_averaged))+
  geom_point() +
  xlab("pH data") + ylab("pH ISRIC")+
  theme_bw()

gg5 <- ggplot(RP3, aes(RP_Ca , Ca))+
  geom_point() +
  xlab("Ca data") + ylab("Ca ISRIC")+
  theme_bw()

gg6 <- ggplot(RP3, aes(RP_K , exchK))+
  geom_point() +
  xlab("K data") + ylab("K_exch ISRIC")+
  theme_bw()

gg7 <- ggplot(RP3, aes(RP_K , K_Mehlich))+
  geom_point() +
  xlab("K data") + ylab("K_Mehlich ISRIC")+
  theme_bw()

gg8 <- ggplot(RP3, aes(RP_Mg , Mg))+
  geom_point() +
  xlab("Mg data") + ylab("Mg ISRIC")+
  theme_bw()

gg9 <- ggplot(RP3, aes(RP_P , P_Mehlich))+
  geom_point() +
  xlab("P data") + ylab("P_Mehlich ISRIC")+
  theme_bw()

gg10 <- ggplot(RP3, aes(RP_Al , Al))+
  geom_point() +
  xlab("Al data") + ylab("Al ISRIC")+
  theme_bw()

gg11 <- ggplot(RP3, aes(RP_Cu , Cu))+
  geom_point() +
  xlab("Cu data") + ylab("Cu ISRIC")+
  theme_bw()

gg12 <- ggplot(RP3, aes(RP_Fe , Fe))+
  geom_point() +
  xlab("Fe data") + ylab("Fe ISRIC")+
  theme_bw()

gg13 <- ggplot(RP3, aes(RP_Mn , Mn))+
  geom_point() +
  xlab("Mn data") + ylab("Mn ISRIC")+
  theme_bw()

gg14 <- ggplot(RP3, aes(RP_Na , Na))+
  geom_point() +
  xlab("Na data") + ylab("Na ISRIC")+
  theme_bw()

gg15 <- ggplot(RP3, aes(RP_A_SAND_MI , Sand_averaged ))+
  geom_point() +
  xlab("Sand data") + ylab("Sand ISRIC")+
  theme_bw()

gg16 <- ggplot(RP3, aes(RP_A_SILT_MI , silt_averaged))+
  geom_point() +
  xlab("Silt data") + ylab("Silt ISRIC")+
  theme_bw()


library(ggplot2)
library(gridExtra)
grid.arrange(gg1, gg2, gg3,gg4,gg5,gg6,gg7,gg8,gg9,gg10,gg11,gg12,gg13,gg13,gg14,gg15,gg16, ncol=5, nrow =4)

ggsave("ISIRC_PD.pdf", grid.arrange(gg1, gg2, gg3,gg4,gg5,gg6,gg7,gg8,gg9,gg10,gg11,gg12,gg13,gg13,gg14,gg15,gg16, ncol = 5, nrow = 4),
       width=12, height = 10)



head(RP)
ds_NOT <- unique(RP[, c("Province", "District","Sector", "All", "All_Reduced_N", "All_reduced_P", "All_reduced_K", "Control")])
names(ds_NOT) <- c("Province", "District","Sector", "NPK", "PK", "NK","NP", "Control")
ds_NOT <- ds_NOT[!is.na(ds_NOT$NPK), ]
head(ds_NOT)
ds_NOT2 <- ds_NOT
longData <- gather(ds_NOT2, treat, value, PK:Control)
head(longData)




longData$YieldDiff <- longData$NPK - longData$value
longData$NutrientMissing <- ifelse(longData$treat == "NP", "K", ifelse(longData$treat == "NK", "P", 
                                                                       ifelse(longData$treat == "PK", "N", "NPK")))

longData$TreatDiff <- ifelse(longData$treat == "NP", "NPK - NP (K Eff.)", 
                             ifelse(longData$treat == "NK", "NPK - NK (P Eff.)", 
                                    ifelse(longData$treat == "PK", "NPK - PK (N Eff.)", "NPK - CON (NPK Eff.)")))

longData$NutrientMissing <- factor(longData$NutrientMissing, levels = c("N", "P", "K", "NPK"))
longData$TreatDiff <- factor(longData$TreatDiff, levels = c("NPK - CON (NPK Eff.)", "NPK - PK (N Eff.)", 
                                                            "NPK - NP (K Eff.)","NPK - NK (P Eff.)"))
head(longData)
gg3 <- ggplot(data=longData, aes(value, y=YieldDiff, col = Province)) +
  facet_grid(Province ~ TreatDiff) +
  stat_density2d() +
  geom_point(size=1.2) +
  geom_abline(intercept=0,slope=0, col="darkgrey") +
  xlab("") +
  ylab("Yield difference [t/ha]") +
  theme_bw() +
  theme(axis.text  = element_text(size=13), legend.position = "none",
        axis.title = element_text(size=15), strip.text = element_text(size=14), 
        legend.text =element_text(size=15), legend.title = element_blank())
gg3





require(tidyr)
ds_NOT <- gather(ds_NOT, Treatment, rootYield, NPK:Control)

ggplot(ds_NOT, aes(Treatment, rootYield, col=Province))+
  geom_point()+
  facet_wrap(~Province)
## model can be fitted by considering sector as trial. There is not reatment that is replicated to take siteNr as location

ds_NOT$DistrictSector <- as.factor(paste(ds_NOT$District, ds_NOT$Sector , sep="_"))

mod1 <- lmer(data=ds_NOT, rootYield ~ Treatment + (0+Treatment|Sector))
mod2 <- lmer(data=ds_NOT, rootYield ~ Treatment:DistrictSector + (0+Treatment|Sector))
anova(mod1, mod2)


summary(mod2)
ref.grid(mod2)
ref.grid(mod2, emm_options(pbkrtest.limit = 4648))
lsm3 <- lsmeans(mod2, c("Treatment", "DistrictSector"))
lsmDF3 <- as.data.frame(lsm3)
lsmDF3 <- lsmDF3[,1:3]
names(lsmDF3)[1] <- "treatment"
names(lsmDF3)[3] <- "lsmean"
row.names(lsmDF3) <- NULL


ran3 <- ranef(mod2)$Sector
ran3$Sector <- row.names(ran3)
names(ran3) <- gsub("Treatment", "", names(ran3))
ran3 <- melt(ran3, 
             measure.vars=c("NPK", "PK", "NK", "NP", "Control"),
             variable.name="treatment",
             id.vars="Sector",
             value.name="blup")
head(ran3)
head(ds_NOT)
ran3 <- unique(merge(ran3, unique(ds_NOT[, c("Sector", "Province", "District")]), by="Sector"))

head(ran3)
ggplot(ran3, aes(Sector, blup, col=treatment))+
  geom_point()+
  geom_abline(slope=0, intercept = 0)+
  facet_grid(treatment ~ Province)+
  theme_bw()+
  theme(axis.text.x = element_blank())

ran3$DistrictSector <- as.factor(paste(ran3$District, ran3$Sector, sep="_"))

ran3 <- merge(ran3, lsmDF3, by=c("treatment", "DistrictSector"))
ran3$blup <- ran3$blup + ran3$lsmean
ran3 <- subset(ran3, select=-c(lsmean))
head(ran3)

ran.w <- spread(ran3, treatment, blup)
head(ran.w)


saveRDS(ran.w, "RwandaPotato_Blups.RDS")

ran.w <- readRDS("RwandaPotato_Blups.RDS")

longData <- gather(ran.w, treat, value, PK:Control)
head(longData)


longData$YieldDiff <- longData$NPK - longData$value
longData$NutrientMissing <- ifelse(longData$treat == "NP", "K", ifelse(longData$treat == "NK", "P", 
                                                                       ifelse(longData$treat == "PK", "N", "NPK")))

longData$TreatDiff <- ifelse(longData$treat == "NP", "NPK - NP (K Eff.)", 
                             ifelse(longData$treat == "NK", "NPK - NK (P Eff.)", 
                                    ifelse(longData$treat == "PK", "NPK - PK (N Eff.)", "NPK - CON (NPK Eff.)")))
longData <- droplevels(longData[longData$YieldDiff <= 30, ])

longData$NutrientMissing <- factor(longData$NutrientMissing, levels = c("N", "P", "K", "NPK"))
longData$TreatDiff <- factor(longData$TreatDiff, levels = c("NPK - CON (NPK Eff.)", "NPK - PK (N Eff.)", 
                                                            "NPK - NP (K Eff.)","NPK - NK (P Eff.)"))
head(longData)
gg3 <- ggplot(data=longData, aes(value, y=YieldDiff, col = Province)) +
  facet_grid(Province ~ TreatDiff) +
  stat_density2d() +
  geom_point(size=1.2) +
  geom_abline(intercept=0,slope=0, col="darkgrey") +
  xlab("") +
  ylab("Yield difference [t/ha]") +
  theme_bw() +
  theme(axis.text  = element_text(size=13), legend.position = "none",
        axis.title = element_text(size=15), strip.text = element_text(size=14), 
        legend.text =element_text(size=15), legend.title = element_blank())
gg3


ggsave("RwandaPotato_BLUPS.png", gg3, width=12, height = 6)

### BLUE at SiteNr

ds_NOTlm <- unique(RP[, c("Province", "District","Sector", "SiteNr", "All", "All_Reduced_N", "All_reduced_P", "All_reduced_K", "Control")])
names(ds_NOTlm) <- c("Province", "District","Sector", "SiteNr","NPK", "PK", "NK","NP", "Control")
ds_NOTlm <- ds_NOTlm[!is.na(ds_NOTlm$NPK), ]
head(ds_NOTlm)
longData2 <- gather(ds_NOTlm, treat, value, PK:Control)
head(longData2)

ds_NOTlm2 <- droplevels(gather(ds_NOTlm, Treatment, rootYield, NPK:Control))
ds_NOTlm2$SiteNr <- as.factor(ds_NOTlm2$SiteNr)
ds_NOTlm2$Treatment <- as.factor(ds_NOTlm2$Treatment)


tt <- unique(ds_NOTlm2[, c("Treatment", "SiteNr", "District")])
table(tt$Treatment, tt$District)
table(tt$Treatment, tt$SiteNr)
table(tt$District, tt$SiteNr)
str(ds_NOTlm2)
modlm2 <- lm(data=ds_NOTlm2, rootYield ~ Treatment + SiteNr )
summary(modlm2)
ds_NOTlm2[ds_NOTlm2$District == "Rutsiro", ]


lm2 <- as.data.frame(coef(summary(modlm2)))
lm2$treat <- rownames(lm2)
lm2$treat <- gsub("Treatment", "", lm2$treat)
lm2$treat <- gsub("SiteNr", "", lm2$treat)
lm2$treat <- gsub("District", "", lm2$treat)
lm2$treat[1] <- "1"
rownames(lm2) <- NULL

lm3 <- lm2[1:37, ]
lm3 <- lm3[, c("treat", "Estimate")]
lm3$Estimate[2:37] <- lm3$Estimate[1] + lm3$Estimate[2:37]
colnames(lm3) <- c("SiteNr", "Estimatesite")

mm <- lm2[38:nrow(lm2),]

for(k in 1:nrow(mm)){
  mm$Treat[k] <- strsplit(mm$treat[k], ":")[[1]][1]
  mm$District[k] <- strsplit(mm$treat[k], ":")[[1]][2]
}

mm <- mm[, c("Treat", "District", "Estimate")]
mm <- merge(RP[, c("Province", "District","Sector", "SiteNr", "Latitude", "Longitude")], mm, by="District")
mm <- merge(mm, lm3, by="SiteNr")

mm$Estimate <- mm$Estimate + mm$Estimatesite
mm <- subset(mm, select=-c(Estimatesite))
wideD <- spread(mm, Treat, Estimate)
wideD <- wideD[, c("Province", "District", "Sector", "Latitude", "Longitude" ,"SiteNr", "NPK", "Control", "NK","NP","PK")]



##BLUE no interaction
modlm <- lm(data=ds_NOTlm2, rootYield ~ Treatment + SiteNr)
cc <- as.data.frame(coef(summary(modlm)))
cc$treat <- rownames(cc)
cc$treat <- gsub("Treatment", "", cc$treat)
cc$treat <- gsub("(Intercept)", "Control", cc$treat)
rownames(cc) <- NULL
tt <- cc[1:5, c(1,5)]
rr <- cc[6:nrow(cc), ]


tt$treat <- c("Control", "NK", "NP", "NPK", "PK")
tt$Estimate[c(2,3,4,5)] <- tt$Estimate[c(2,3,4,5)] + tt$Estimate[1]
tt$SiteNr<- "1"

rrr <- NULL
for(k in 1:nrow(rr)){
  ttt <- tt
  ttt$Estimate <-   ttt$Estimate + rr[k, "Estimate"]
  ttt$SiteNr <- as.factor(k+1)
  rrr <- rbind(rrr, ttt)
}


lmResult <- rbind(tt, rrr)

wideD <- spread(lmResult, treat, Estimate)
wideD <- wideD[, c("SiteNr", "NPK", "Control", "NK","NP","PK")]
wideD <- merge(RP[, c("Province", "District","Sector", "SiteNr", "Latitude", "Longitude")], wideD, by="SiteNr")


longData <- gather(wideD, treat, value, Control:PK)
head(longData)
longData$YieldDiff <- longData$NPK - longData$value
longData$NutrientMissing <- ifelse(longData$treat == "NP", "K", ifelse(longData$treat == "NK", "P", 
                                                                       ifelse(longData$treat == "PK", "N", "NPK")))

longData$TreatDiff <- ifelse(longData$treat == "NP", "NPK - NP (K Eff.)", 
                             ifelse(longData$treat == "NK", "NPK - NK (P Eff.)", 
                                    ifelse(longData$treat == "PK", "NPK - PK (N Eff.)", "NPK - CON (NPK Eff.)")))
longData <- droplevels(longData[longData$YieldDiff <= 30, ])

longData$NutrientMissing <- factor(longData$NutrientMissing, levels = c("N", "P", "K", "NPK"))
longData$TreatDiff <- factor(longData$TreatDiff, levels = c("NPK - CON (NPK Eff.)", "NPK - PK (N Eff.)", 
                                                            "NPK - NP (K Eff.)","NPK - NK (P Eff.)"))

wideD$DM_NPK <- wideD$NPK * 0.21 * 1000
wideD$DM_PK <- wideD$PK * 0.21 * 1000
wideD$DM_NK <- wideD$NK * 0.21 * 1000
wideD$DM_NP <- wideD$NP * 0.21 * 1000
wideD$DM_Control <- wideD$Control * 0.21 * 1000


head(wideD)
#RecoveryFraction <- data.frame(rec_N=0.5, rec_P=0.21, rec_K=0.49)## cassava
RecoveryFraction <- data.frame(rec_N=0.41, rec_P=0.25, rec_K=0.65)## potato


source("/home/akilimo/projects/SoilNPK/Rwanda/QUEFTS_function.R")

soilNPK <- NULL
validation_result <- NULL
for(i in 1:37){
  oneTrial <- wideD[i,]	
  # WLY <- oneTrial$NPK/3*1000 # convert to kg.ha
  WLY <- oneTrial$DM_NPK # is in kg/ha dry root
  
  addedFertilizer <- as.data.frame(matrix(nrow=4,ncol=3))
  colnames(addedFertilizer) <- c("FN", "FP", "FK")
  addedFertilizer[,1] <- c(0, 50, 75, 75)
  addedFertilizer[,2] <- c(0, 0.44*75, 0.44*50, 0.44*75)
  addedFertilizer[,3] <- c(0, 0.83*50, 0.83*50, 0.83*25)	
  
  
  YieldMatrix <- as.data.frame(matrix(nrow=4,ncol=2))
  colnames(YieldMatrix) <- c("treatment", "NOT_yield")
  YieldMatrix[,1] <- c("Control", "PK", "NK", "NP")
  YieldMatrix[,2] <- c(oneTrial$DM_Control, oneTrial$DM_PK, oneTrial$DM_NK, oneTrial$DM_NP)
  
  ## optimizing by min TSS. 
  soil_NPK <- optim(par=c(0,0,0), optim_INS, lower=c(0.1,0.1,0.1), method = "L-BFGS-B", addedFertilizer=addedFertilizer, YieldMatrix=YieldMatrix, WLY=WLY)$par	
  
  if(!is.na(oneTrial$Control)){
    Yield_control <- Yield_S(SN = addedFertilizer[1,1] + soil_NPK[1] , SP = addedFertilizer[1,2] + soil_NPK[2], 
                             SK = addedFertilizer[1,3] + soil_NPK[3], WLY = WLY)
    oneTrial$Control_observed <- oneTrial$DM_CON
    oneTrial$Control_estimated <-  Yield_control
  }
  
  
  validation_result <- rbind(validation_result, oneTrial)
  
  snpk <- oneTrial
  snpk$soilN <- soil_NPK[1]
  snpk$soilP <- soil_NPK[2]
  snpk$soilK <- soil_NPK[3]
  soilNPK <- rbind(soilNPK, snpk)
}

par(mfrow=c(2,3))
hist(soilNPK$soilN)
hist(soilNPK$soilP)
hist(soilNPK$soilK)
plot(soilNPK$soilN, soilNPK$soilP)
plot(soilNPK$soilN, soilNPK$soilK)
plot(soilNPK$soilK, soilNPK$soilP)

gg <- ggplot(validation_result, aes( DM_Control, Control_estimated,col=District))+
  geom_point()+ 
  geom_abline(intercept=0, slope=1)+
  xlim(1000, 7000) + ylim(1000,7000)+
  #geom_smooth(method=lm)+
  #stat_density2d()+ 
  xlab("Observed Control (BLUPs)") + ylab(" Control estimated through QUEFTS")+
  theme_bw()

ggsave("Raw_QUEFTS_Potato.png", gg, width=6, height=6)

validation_result[validation_result$Control_estimated > validation_result$DM_Control & validation_result$Control_estimated > 4000, ]
validation_result[validation_result$District == "Musanze", ]
validation_result[validation_result$District == "Karongi", ]
validation_result[validation_result$District == "Rutsiro", ]

gg2 <- ggplot(validation_result, aes(District, Control, fill=District))+
  geom_boxplot()
ggsave("BLUE_control_Potato.png", gg2, width=6, height=6)

ggplot(validation_result, aes(District, DM_NPK, fill=District))+
  geom_boxplot()




### since model is fitted at sector level not at site, we ned to have GPS at sector level

SectorD <- ddply(RP, .( Province, District, Sector), summarise, Latitude = mean(Latitude), Longitude = mean(Longitude))

head(ran.w)
dd <- merge(ran.w, SectorD, by=c("Province", "District", "Sector"))

dd$DM_NPK <- dd$NPK * 0.21 * 1000
dd$DM_PK <- dd$PK * 0.21 * 1000
dd$DM_NK <- dd$NK * 0.21 * 1000
dd$DM_NP <- dd$NP * 0.21 * 1000
dd$DM_Control <- dd$Control * 0.21 * 1000


0.44 * 75; 0.44*50
0.83 * 50; 0.83*25



head(dd)
#RecoveryFraction <- data.frame(rec_N=0.5, rec_P=0.21, rec_K=0.49)## cassava
RecoveryFraction <- data.frame(rec_N=0.41, rec_P=0.25, rec_K=0.65)## potato



source("QUEFTS_function.R")

soilNPK <- NULL
validation_result <- NULL
for(i in 1:nrow(dd)){
  oneTrial <- dd[i,]	
  # WLY <- oneTrial$NPK/3*1000 # convert to kg.ha
  WLY <- oneTrial$DM_NPK # is in kg/ha dry root
  
  addedFertilizer <- as.data.frame(matrix(nrow=4,ncol=3))
  colnames(addedFertilizer) <- c("FN", "FP", "FK")
  addedFertilizer[,1] <- c(0, 50, 75, 75)
  addedFertilizer[,2] <- c(0, 0.44*75, 0.44*50, 0.44*75)
  addedFertilizer[,3] <- c(0, 0.83*50, 0.83*50, 0.83*25)	
  
  
  YieldMatrix <- as.data.frame(matrix(nrow=4,ncol=2))
  colnames(YieldMatrix) <- c("treatment", "NOT_yield")
  YieldMatrix[,1] <- c("Control", "PK", "NK", "NP")
  YieldMatrix[,2] <- c(oneTrial$DM_Control, oneTrial$DM_PK, oneTrial$DM_NK, oneTrial$DM_NP)
  
  ## optimizing by min TSS. 
  soil_NPK <- optim(par=c(0,0,0), optim_INS, lower=c(0.1,0.1,0.1), method = "L-BFGS-B", addedFertilizer=addedFertilizer, YieldMatrix=YieldMatrix, WLY=WLY)$par	
  
  if(!is.na(oneTrial$Control)){
    Yield_control <- Yield_S(SN = addedFertilizer[1,1] + soil_NPK[1] , SP = addedFertilizer[1,2] + soil_NPK[2], 
                             SK = addedFertilizer[1,3] + soil_NPK[3], WLY = WLY)
    oneTrial$Control_observed <- oneTrial$DM_CON
    oneTrial$Control_estimated <-  Yield_control
  }
  
  
  validation_result <- rbind(validation_result, oneTrial)
  
  snpk <- oneTrial
  snpk$soilN <- soil_NPK[1]
  snpk$soilP <- soil_NPK[2]
  snpk$soilK <- soil_NPK[3]
  soilNPK <- rbind(soilNPK, snpk)
}


require(ggplot2)
require(tidyr)

saveRDS(validation_result, "validation_result_potato.RDS")
saveRDS(soilNPK, "soilNPK_potato.rds")

validation_result <- readRDS("validation_result_potato.RDS")
soilNPK <- readRDS("soilNPK_potato.rds")

gg <- ggplot(validation_result, aes( DM_Control, Control_estimated,))+
  geom_point()+ 
  geom_abline(intercept=0, slope=1)+
  xlim(1000, 7000) + ylim(1000,7000)+
  #geom_smooth(method=lm)+
  #stat_density2d()+ 
  xlab("Observed Control (BLUPs)") + ylab(" Control estimated through QUEFTS")+
  theme_bw()


ggsave("Potato_BLUP_QUEFTS.png", gg, width=6, height=6)

soilNPK <- readRDS("soilNPK_potato.rds")
soilNPK$soil_N <- round(soilNPK$soilN/5)*5
soilNPK$soil_P <- round(soilNPK$soilP/5)*5
soilNPK$soil_K <- round(soilNPK$soilK/5)*5

soilNPK <- soilNPK[order(soilNPK$soil_N),]

soilNPK$DistrictSector <- factor(soilNPK$DistrictSector, levels = soilNPK$DistrictSector[order(soilNPK$DistrictSector)])

ggplot(soilNPK, aes(DistrictSector, soil_N, col=as.factor(DistrictSector)))+
  geom_point(size=2)+
  theme_bw()+
  theme(axis.text.x = element_text(angle=90))

ggplot(soilNPK, aes(Latitude, Longitude, col=as.factor(soil_P)))+
  geom_point()

ggplot(soilNPK, aes(Latitude, Longitude, col=as.factor(soil_K)))+
  geom_point()


# setwd('/home/akilimo/lintul/lintul/dataSources/rwanda')
setwd("/nfs/extra_storage/gisdata/countryshapefiles")
Region1 <- readOGR(dsn=getwd(), layer="gadm36_RWA_1")
Region2 <- readOGR(dsn=getwd(), layer="gadm36_RWA_2")
Region3 <- readOGR(dsn=getwd(), layer="gadm36_RWA_3")
plot(Region2)

AOIMapS <- subset(Region2, NAME_2 %in% unique(soilNPK$District ) ) 
plot(Region1)
plot(AOIMapS, add=TRUE, col="red")


## row data
wideD <- RP[, c("Province", "District", "Sector", "Latitude", "Longitude" ,"SiteNr", "All", "Control", "All_reduced_P","All_reduced_K","All_Reduced_N")]
colnames(wideD) <- c("Province", "District", "Sector", "Latitude", "Longitude" ,"SiteNr", "NPK", "Control", "NK","NP","PK")
longData <- gather(wideD, treat, value, Control:PK)
head(longData)
longData$YieldDiff <- longData$NPK - longData$value
longData$NutrientMissing <- ifelse(longData$treat == "NP", "K", ifelse(longData$treat == "NK", "P", 
                                                                       ifelse(longData$treat == "PK", "N", "NPK")))

longData$TreatDiff <- ifelse(longData$treat == "NP", "NPK - NP (K Eff.)", 
                             ifelse(longData$treat == "NK", "NPK - NK (P Eff.)", 
                                    ifelse(longData$treat == "PK", "NPK - PK (N Eff.)", "NPK - CON (NPK Eff.)")))
longData <- droplevels(longData[longData$YieldDiff <= 30, ])

longData$NutrientMissing <- factor(longData$NutrientMissing, levels = c("N", "P", "K", "NPK"))
longData$TreatDiff <- factor(longData$TreatDiff, levels = c("NPK - CON (NPK Eff.)", "NPK - PK (N Eff.)", 
                                                            "NPK - NP (K Eff.)","NPK - NK (P Eff.)"))

wideD$DM_NPK <- wideD$NPK * 0.21 * 1000
wideD$DM_PK <- wideD$PK * 0.21 * 1000
wideD$DM_NK <- wideD$NK * 0.21 * 1000
wideD$DM_NP <- wideD$NP * 0.21 * 1000
wideD$DM_Control <- wideD$Control * 0.21 * 1000

head(wideD)
RecoveryFraction <- data.frame(rec_N=0.41, rec_P=0.25, rec_K=0.65)## potato


source("QUEFTS_function.R")

soilNPK <- NULL
validation_result <- NULL
for(i in 1:37){
  oneTrial <- wideD[i,]	
  # WLY <- oneTrial$NPK/3*1000 # convert to kg.ha
  WLY <- oneTrial$DM_NPK # is in kg/ha dry root
  
  addedFertilizer <- as.data.frame(matrix(nrow=4,ncol=3))
  colnames(addedFertilizer) <- c("FN", "FP", "FK")
  addedFertilizer[,1] <- c(0, 50, 75, 75)
  addedFertilizer[,2] <- c(0, 0.44*75, 0.44*50, 0.44*75)
  addedFertilizer[,3] <- c(0, 0.83*50, 0.83*50, 0.83*25)	
  
  
  YieldMatrix <- as.data.frame(matrix(nrow=4,ncol=2))
  colnames(YieldMatrix) <- c("treatment", "NOT_yield")
  YieldMatrix[,1] <- c("Control", "PK", "NK", "NP")
  YieldMatrix[,2] <- c(oneTrial$DM_Control, oneTrial$DM_PK, oneTrial$DM_NK, oneTrial$DM_NP)
  
  ## optimizing by min TSS. 
  soil_NPK <- optim(par=c(0,0,0), optim_INS, lower=c(0.1,0.1,0.1), method = "L-BFGS-B", addedFertilizer=addedFertilizer, YieldMatrix=YieldMatrix, WLY=WLY)$par	
  
  if(!is.na(oneTrial$Control)){
    Yield_control <- Yield_S(SN = addedFertilizer[1,1] + soil_NPK[1] , SP = addedFertilizer[1,2] + soil_NPK[2], 
                             SK = addedFertilizer[1,3] + soil_NPK[3], WLY = WLY)
    oneTrial$Control_observed <- oneTrial$DM_CON
    oneTrial$Control_estimated <-  Yield_control
  }
  
  
  validation_result <- rbind(validation_result, oneTrial)
  
  snpk <- oneTrial
  snpk$soilN <- soil_NPK[1]
  snpk$soilP <- soil_NPK[2]
  snpk$soilK <- soil_NPK[3]
  soilNPK <- rbind(soilNPK, snpk)
}

setwd("/home/akilimo/projects/SoilNPK/Rwanda")
write.csv(soilNPK, "soilNPK_Potato.csv", row.names = FALSE)
soilNPK <- read.csv("soilNPK_Potato.csv")

par(mfrow=c(2,3))
hist(soilNPK$soilN)
hist(soilNPK$soilP)
hist(soilNPK$soilK)
plot(soilNPK$soilN, soilNPK$soilP)
plot(soilNPK$soilN, soilNPK$soilK)
plot(soilNPK$soilK, soilNPK$soilP)



### get TY with 6 fertilizers and 27 soil NPK classes ( three values per NPK)
QN <- quantile(soilNPK$soilN, probs=seq(0, 1, 0.25))
QP <- quantile(soilNPK$soilP, probs=seq(0, 1, 0.25))
QK <- quantile(soilNPK$soilK, probs=seq(0, 1, 0.25))

soilN <- QN[2:4]
soilP <- QP[2:4]
soilK <- QK[2:4]

QN <- QN[2:4]
QP <- QP[2:4]
QK <- QK[2:4]


soilNPK_tt <- as.data.frame(expand.grid(soilN, soilP, soilK))
colnames(soilNPK_tt) <- c("soilN", "soilP", "soilK")
soilNPK_tt$Ncond <- ifelse(soilNPK_tt$soilN == QN[1], "lowN", ifelse(soilNPK_tt$soilN == QN[2], "medN", "highN"))
soilNPK_tt$Pcond <- ifelse(soilNPK_tt$soilP == QP[1], "lowP", ifelse(soilNPK_tt$soilP == QP[2], "medP", "highP"))
soilNPK_tt$Kcond <- ifelse(soilNPK_tt$soilK == QK[1], "lowK", ifelse(soilNPK_tt$soilK == QK[2], "medK", "highK"))
soilNPK_tt$soilNPKQ <- paste(soilNPK_tt$Ncond, soilNPK_tt$Pcond, soilNPK_tt$Kcond, sep="_")
soilNPK_tt$SS <- paste(round(as.numeric(soilNPK_tt$soilN, digits=0)), round(as.numeric(soilNPK_tt$soilP, digits=0)), round(as.numeric(soilNPK_tt$soilK, digits=0)), sep="_")


soilNPK_P <- as.data.frame(expand.grid(soilN, soilP, soilK))
colnames(soilNPK_P) <- c("soilN", "soilP", "soilK")

soilNPK_P$rec_N <- 0.5
soilNPK_P$rec_P <- 0.15
soilNPK_P$rec_K <- 0.5
soilNPK_P$rel_N <- 1
soilNPK_P$rel_P <- soilNPK_P$soilP / soilNPK_P$soilN
soilNPK_P$rel_K <- soilNPK_P$soilK / soilNPK_P$soilN






## WLY 
quantile(wideD$DM_NPK, probs=seq(0,1, 0.05), na.rm=TRUE)
WLY = 9000

### first season this computation is made for 300 USD. To show AKILIMO could do for flexible budget, we need to run this for 
### the price of blanket recommendation (6 bags of NPK 17) whihc cost 213$ = 213000 RWF
##InvestHa <- 190406 ## 200 USD
InvestHa <- 213000 ## 100 USD = 95200;  300 USD = 285600
rootUP <- 155

areaHa <- 1 # one ha
areaUnits <- "ha"
Planting_Harvest <- data.frame(st=seq(1, 365, 7), en = (seq(1, 365, 7) + 454), weekNr = seq(1:53))



### NPK from fertilizes
fertNPK <- rbind(
  data.frame(Nfert = 50, Pfert = 25, Kfert = 45),  ## blanket
  data.frame(Nfert = 100, Pfert = 50, Kfert = 90), ## max by doubling
  data.frame(Nfert = 100, Pfert = 25, Kfert = 45), ## 2N
  data.frame(Nfert = 50, Pfert = 50, Kfert = 45),  ## N, 2P K
  data.frame(Nfert = 50, Pfert = 25, Kfert = 90))  ## N, P 2K


## fertilizers
fertilizers_Rw <- data.frame(type = c("NPK17_17_17", "urea", "DAP", "15N25CaoB", "NPK15_9_20", "NPK23_10_5"),
                             N_cont = c(0.17, 0.46, 0.18, 0.15, 0.15, 0.23),
                             P_cont = c(0.075, 0.0, 0.2, 0, 0.04, 0.044), 
                             K_cont = c(0.14, 0, 0, 0, 0.166, 0.042),
                             price = c(615, 462, 511, 615, 615, 511))


source("/home/akilimo/projects/SoilNPK/Rwanda/QUEFTS_function.R")


## get CY
WLYCY <- NULL
for(k in 1:nrow(soilNPK_P)){
  WLYCY <- rbind(WLYCY, QUEFTS_WLYCY_Potato(SoilData=soilNPK_P[k, ], WLY=WLY))
}


## QUEFTS fert recom optimize the fertilizer recommendation for maxInv in local currency and provide expected target yield in kg
require(limSolve)
require(lpSolve)
require(lpSolveAPI)

getoptimzedRecom <- function (fertilizers_Rw, soilwlycy, rootUP, InvestHa){
  fertilizer <- fertilizers_Rw
  initial <- rep(0, nrow(fertilizer))
  lowerST <- rep(0.0, nrow(fertilizer))
  DCY <- soilwlycy$Current_Yield
  CY_user <- DCY/0.21
  WLY_user <- soilwlycy$water_limited_yield /0.21
  if(CY_user == WLY_user){
    snpk <- unique(soilwlycy[, c("soilN","soilP","soilK")])
    snpk$N <- snpk$P <- snpk$K <- 0
    snpk$WLY <- snpk$CurrentY <-snpk$TargetY <- WLY_user
    snpk$TC <- snpk$NR <- 0
    fertilizer$FR <- 0
    Recomfr_wide <- spread(fertilizer[, c('type', 'FR')], type, FR)
    
    return(cbind(snpk, Recomfr_wide))
  }else{
    FR <- optim(par=initial, fn=optim_NR_potato, lower=lowerST, method = "L-BFGS-B", control=list(fnscale=-1), rootUP=rootUP, 
                QID=soilwlycy, CY=DCY, fertilizer=fertilizer, invest=InvestHa)$par
    
    if(all(FR == 0)){
      return(data.frame(lat=lat, lon=lon, plDate, N=0, P=0, K= 0,WLY=WLY_user, CurrentY = CY_user, TargetY = CY_user,  TC =0, NR=0))
    }else{
      
      fertilizer$FR <- FR	
      
      ## NPK rate for ha of land	
      N <- as.vector(FR %*% fertilizer$N_cont)
      P <- as.vector(FR %*% fertilizer$P_cont)
      K <- as.vector(FR %*% fertilizer$K_cont)
      rec <- c(N, P, K)	
      
      ## NPK rate for user land size
      NPK_user <- rec
      
      ## TY for ha of land 	
      TY <- QUEFTS1_Pedotransfer_potato(QID=soilwlycy, rec)	# Yield possible at recommended NPK in kg/ha dry wt. 
      
      ## both CY and TY should be changed to user land size in ton/ha and fresh wt
      TY_user <-  TY/0.21
      
      ## total cost per ha
      TC <- (sum(FR * fertilizer$price))
      
      ## net revenue on the users land size
      GR <- (TY_user - CY_user) * rootUP                      # Gross revenue given root up is for fresh wt ton/ha
      NR <- round(GR - TC, digits=0)    											# Net Revenue
      
      ## reporting the recommended fertilizers
      # Recomfr <- fertilizer[fertilizer$FR > 0, ]
      Recomfr_wide <- spread(fertilizer[, c('type', 'FR')], type, FR)
      
      
      
      d1 <- data.frame(N=NPK_user[1], P=NPK_user[2], K= NPK_user[3],
                       WLY=WLY_user, CurrentY = CY_user, TargetY = TY_user,  TC =TC, NR=NR)
      d2 <- cbind(data.frame(soilN = soilwlycy$soilN, soilP = soilwlycy$soilP, soilK = soilwlycy$soilK), d1, Recomfr_wide)
      d2$NR <- ((d2$TargetY - d2$CurrentY)*rootUP) - d2$TC
      if(d2$TargetY < d2$CurrentY){
        d1$TargetY <- d1$CurrentY
        d1$NR <- d1$TC <- d1$N <- d1$P <- d1$K <- 0
        fertilizer$FR <- 0
        Recomfr_wide <- spread(fertilizer[, c('type', 'FR')], type, FR)
        d2 <- cbind(data.frame(soilN = soilwlycy$soilN, soilP = soilwlycy$soilP, soilK = soilwlycy$soilK), d1, Recomfr_wide)
      }
      return(d2)
    }
  }
}


check25_Potato <- function(soilwlycy, PotatoRecomL, fertilizers_Rw){
  QID=soilwlycy
  fert_optim <- PotatoRecomL
  WLY <- QID$water_limited_yield
  DCY <- QID$Current_Yield
  
  onlyFert <- subset(fert_optim, select = -c(N, P, K, WLY, CurrentY, TargetY, TC, NR, soilN,soilP,soilK))
  fertinfo <- subset(fert_optim, select = c(N, P, K, WLY, CurrentY, TargetY, TC, NR, soilN,soilP,soilK))
  
  if(fertinfo$NR <= 0){ ## all fertilizer recom == 0
    fertinfo$NR <- 0
    fertinfo$TC <- 0
    fertinfo$TargetY <- fertinfo$CurrentY
    fertinfo$N <- fertinfo$P <- fertinfo$K <- 0
    onlyFert[,1:ncol(onlyFert)] <- 0
    onlyFertQ <- keepRows(fertilizers_Rw, onlyFert)
    
    return(cbind(fertinfo, onlyFertQ))
  }else{
    RecomperHa2 <- gather(onlyFert, type, amount)
    onlyFert2 <- droplevels(RecomperHa2[RecomperHa2$amount > 25, ])
    
    if(nrow(onlyFert2) == 0 ){ ## if all fertilizer recom < 25 kg/ha all will be set to 0
      fertinfo$NR <- 0
      fertinfo$TC <- 0
      fertinfo$TargetY <- fertinfo$CurrentY
      fertinfo$N <- fertinfo$P <- fertinfo$K <- 0
      onlyFert[,1:ncol(onlyFert)] <- 0
      onlyFertQ <- keepRows(fertilizers_Rw, onlyFert)
      return(cbind(fertinfo, onlyFertQ))
    }else if (ncol(onlyFert) == nrow(onlyFert2)){ ## if all fertilizer recom are >= 25 kg/ha they will be kept and only checked for NR >= 18% of invest
      onlyFertQ <- keepRows(fertilizers_Rw, onlyFert)
      Reset_fert_Cont <-  cbind(fertinfo, onlyFertQ)
      GPS_fertRecom <- NRabove18Cost_potato(ds=Reset_fert_Cont)
      return(cbind(GPS_fertRecom))
    }else{
      fert25 <- spread(onlyFert2, type, amount) ## when some fertilizer recom are dropped b/c < 25 kg/ha, ty and NR should be recalculated
      fert_optim2 <- cbind(fertinfo, fert25)	
      fertilizer <- fertilizers_Rw[fertilizers_Rw$type %in% onlyFert2$type, ]
      Reset_fert_Cont <- Rerun_25kgKa_potato(rootUP=rootUP, rdd=fert_optim2, fertilizer=fertilizer, QID=QID, onlyFert=onlyFert2)
      
      
      if(Reset_fert_Cont$NR <= 0){ ## after rerunning after avoiding <25KG/ha fertilizers, if NR <=0
        fertinfo$NR <- 0
        fertinfo$TC <- 0
        fertinfo$TargetY <- fertinfo$CurrentY
        fertinfo$N <- fertinfo$P <- fertinfo$K <- 0
        onlyFert[,1:ncol(onlyFert)] <- 0
        onlyFertQ <- keepRows(fertilizers_Rw, onlyFert)
        return(cbind(fertinfo, onlyFertQ))
      }else{
        GPS_fertRecom <- NRabove18Cost_potato(ds=Reset_fert_Cont)
        onlyFert <- subset(GPS_fertRecom, select = -c(N, P, K, WLY, CurrentY, TargetY, TC, NR, soilN,soilP,soilK))
        fertinfo <- subset(GPS_fertRecom, select = c(N, P, K, WLY, CurrentY, TargetY, TC, NR, soilN,soilP,soilK))
        onlyFertQ <- keepRows(fertilizers_Rw, onlyFert)
        return(cbind(fertinfo, onlyFertQ))
      }
    }
  }
}

### sesaon 1
PotatoRecom <- NULL
for(j in 1:nrow(WLYCY)){
  PotatoRecom <- rbind(PotatoRecom,  getoptimzedRecom(fertilizers_Rw = fertilizers_Rw, soilwlycy=WLYCY[j, ], rootUP=rootUP, InvestHa=InvestHa))
}


str(PotatoRecom)
head(PotatoRecom)


PotatoRecom_adj <- NULL
for(j in 1:nrow(WLYCY)){
  print(j)
  PotatoRecom_adj <- rbind(PotatoRecom_adj, check25_Potato(soilwlycy=WLYCY[j, ], PotatoRecomL = PotatoRecom[j, ], fertilizers_Rw=fertilizers_Rw) )
}

head(PotatoRecom_adj)

PotatoRecom_adj$SS <- paste(round(PotatoRecom_adj$soilN, digits=0),round(PotatoRecom_adj$soilP, digits=0), round(PotatoRecom_adj$soilK, digits=0), sep="_")

PotatoRecom_X <- merge(PotatoRecom_adj, soilNPK_tt[, c("soilNPKQ", "SS")], by="SS")
PotatoRecom_X <- subset(PotatoRecom_X, select=-c(SS))
head(PotatoRecom_X)
names(PotatoRecom_X)[13] <- "N15CaO25B"


par(mfrow=c(2,3))
hist(PotatoRecom_X$NPK23_10_5 )
hist(PotatoRecom_X$NPK15_9_20)
hist(PotatoRecom_X$DAP)
hist(PotatoRecom_X$NPK17_17_17)
hist(PotatoRecom_X$urea)
hist(PotatoRecom_X$N15CaO25B)
PotatoRecom_X[PotatoRecom_X$N15CaO25B == 0 & PotatoRecom_X$NPK15_9_20 == 0 & PotatoRecom_X$NPK17_17_17 == 0 & PotatoRecom_X$NPK23_10_5 == 0 &
                PotatoRecom_X$DAP == 0 & PotatoRecom_X$urea == 0, ]
PotatoRecom_X[PotatoRecom_X$N15CaO25B == 0 , ]
PotatoRecom_X[PotatoRecom_X$NPK15_9_20 == 0 , ]
PotatoRecom_X[PotatoRecom_X$NPK17_17_17 == 0 , ]
PotatoRecom_X[PotatoRecom_X$NPK23_10_5 == 0 , ]
PotatoRecom_X[PotatoRecom_X$DAP == 0, ]
PotatoRecom_X[PotatoRecom_X$urea == 0, ]
PotatoRecom_X[PotatoRecom_X$soilN >100, ]
write.csv(PotatoRecom_X, "PotatoRecom.csv", row.names = FALSE)



#### season 2

PotatoRecom_X <- read.csv("PotatoRecom.csv") ### Season 1 advice with 300 USD

wlydataUsedSeason1 <- PotatoRecom_X[, c("soilN", "soilP", "soilK", "WLY", "CurrentY")]
wlydataUsedSeason1$rec_N <- 0.5
wlydataUsedSeason1$rec_P <- 0.15
wlydataUsedSeason1$rec_K <- 0.5
wlydataUsedSeason1$rel_N <- 1
wlydataUsedSeason1$rel_P <- wlydataUsedSeason1$soilP / wlydataUsedSeason1$soilN
wlydataUsedSeason1$rel_K <- wlydataUsedSeason1$soilK / wlydataUsedSeason1$soilN
wlydataUsedSeason1 <- wlydataUsedSeason1[, c("soilN", "soilP", "soilK", "rec_N", "rec_P", "rec_K", "rel_N", "rel_P", "rel_K", "WLY", "CurrentY")]
colnames(wlydataUsedSeason1) <- c("soilN","soilP","soilK", "rec_N", "rec_P", "rec_K", "rel_N", "rel_P", "rel_K", "water_limited_yield", "Current_Yield")


PotatoRecom_2021 <- NULL
for(j in 1:nrow(WLYCY)){
  PotatoRecom_2021 <- rbind(PotatoRecom_2021,  getoptimzedRecom(fertilizers_Rw = fertilizers_Rw, soilwlycy=WLYCY[j, ], rootUP=rootUP, InvestHa=213000))
}


PotatoRecom_adj_21 <- NULL
for(j in 1:nrow(WLYCY)){
  print(j)
  PotatoRecom_adj_21 <- rbind(PotatoRecom_adj_21, check25_Potato(soilwlycy=WLYCY[j, ], PotatoRecomL = PotatoRecom_2021[j, ], fertilizers_Rw=fertilizers_Rw) )
}


head(PotatoRecom_adj_21)

PotatoRecom_adj_21$SS <- paste(round(PotatoRecom_adj_21$soilN, digits=0),round(PotatoRecom_adj_21$soilP, digits=0), round(PotatoRecom_adj_21$soilK, digits=0), sep="_")

PotatoRecom_X21 <- merge(PotatoRecom_adj_21, soilNPK_tt[, c("soilNPKQ", "SS")], by="SS")
PotatoRecom_X21 <- subset(PotatoRecom_X21, select=-c(SS))
head(PotatoRecom_X21)
names(PotatoRecom_X21)[13] <- "N15CaO25B"
PotatoRecom_X21[PotatoRecom_X21$N15CaO25B == 0 & PotatoRecom_X21$NPK15_9_20 == 0 & PotatoRecom_X21$NPK17_17_17 == 0 & PotatoRecom_X21$NPK23_10_5 == 0 &
                PotatoRecom_X21$DAP == 0 & PotatoRecom_X21$urea == 0, ]
PotatoRecom_X21[PotatoRecom_X21$N15CaO25B == 0 , ]
PotatoRecom_X21[PotatoRecom_X21$NPK15_9_20 == 0 , ]
PotatoRecom_X21[PotatoRecom_X21$NPK17_17_17 == 0 , ]
PotatoRecom_X21[PotatoRecom_X21$NPK23_10_5 == 0 , ]
PotatoRecom_X21[PotatoRecom_X21$DAP == 0, ]
PotatoRecom_X21[PotatoRecom_X21$urea == 0, ]
PotatoRecom_X21[PotatoRecom_X21$soilN >100, ]

write.csv(PotatoRecom_X21, "PotatoRecom_2021.csv", row.names = FALSE)







PotatoRecom_X <- read.csv("PotatoRecom.csv") ### Season 1 advice with 300 USD
PotatoRecom_2021 <- read.csv("PotatoRecom_2021.csv") ### Season 2 advuce with 213 USD

summary(PotatoRecom_X$soilN)
summary(PotatoRecom_2021$soilN)

hist(PotatoRecom_X$N)
hist(PotatoRecom_X$P)
hist(PotatoRecom_X$K)

PotatoRecom_X[order(PotatoRecom_X$N), ]

quantile(PotatoRecom_X$N, probs=seq(0, 1, 0.1))
quantile(PotatoRecom_2021$N, probs=seq(0, 1, 0.1))

PotatoRecom_X[PotatoRecom_X$N <= 40, ]


PotatoRecom_X[order(PotatoRecom_X$soilN, PotatoRecom_X$soilK), ]
XX <- PotatoRecom_X[, c("urea", "NPK17_17_17","N15CaO25B", "N15CaO25B", "NPK15_9_20","DAP")]
XX[order(XX$urea, XX$DAP ), ]

zerofert <- PotatoRecom_X[PotatoRecom_X$N15CaO25B == 0 & PotatoRecom_X$NPK15_9_20 == 0 & PotatoRecom_X$NPK17_17_17 == 0 & PotatoRecom_X$N15CaO25B == 0 &
                            PotatoRecom_X$DAP == 0 & PotatoRecom_X$urea == 0, ]

PotatoRecom_X$soil_N <- ifelse(PotatoRecom_X$soilN > 100, "High_N", ifelse(PotatoRecom_X$soilN < 50, "Low_N", "Med_N"))
PotatoRecom_X$soil_P <- ifelse(PotatoRecom_X$soilP > 70, "High_P", ifelse(PotatoRecom_X$soilP < 20, "Low_P", "Med_P"))
PotatoRecom_X$soil_K <- ifelse(PotatoRecom_X$soilK > 100, "High_K", ifelse(PotatoRecom_X$soilK < 50, "Low_K", "Med_K"))

PotatoRecom_X[PotatoRecom_X$soil_N == "High_N" & PotatoRecom_X$soil_P=="High_P", ]


hist(PotatoRecom_X$NR)
quantile(PotatoRecom_X$NR, probs=seq(0 ,1, 0.1))

ggplot(PotatoRecom_X, aes(TC, NR, col=soilNPKQ))+
  geom_point()+
  facet_wrap(~soilNPKQ)


PotatoRecom_X$soil_N <- factor(PotatoRecom_X$soil_N, levels = c("High_N", "Med_N", "Low_N"))
PotatoRecom_X$soil_P <- factor(PotatoRecom_X$soil_P, levels = c("High_P", "Med_P", "Low_P"))
PotatoRecom_X$soil_K <- factor(PotatoRecom_X$soil_K, levels = c("High_K", "Med_K", "Low_K"))
PotatoRecom_X$soil_K <- as.factor(PotatoRecom_X$soil_K)

gg <- ggplot(PotatoRecom_X, aes(TC, NR, col=soil_K))+
  geom_point(size=3, position = position_jitter(w = 7000))+
  facet_grid(soil_P~soil_N)+
  theme_bw()+
  theme(strip.text = element_text(size=14), axis.text =  element_text(size=10)
        , axis.title =  element_text(size=14))
ggsave("RW_fert.png", gg, width=10, height = 8)


PotatoRecom_X[PotatoRecom_X$soil_N == "High_N" & PotatoRecom_X$soil_P=="High_P", ]
PotatoRecom_X[PotatoRecom_X$soil_N == "High_N" & PotatoRecom_X$soil_P=="Low_P", ]


############ creating packages and test ############
## one bag  = 50 kg fertilizer

### packages:
Ppacks <- rbind(
  data.frame(type="NPK171717_6bags",N = round(300*0.17, digits=0), P= round(300*0.075, digits=0), K = round(300 * 0.14, digits=0), price=(300*615)),## blanket recom
  data.frame(type="NPK171717_3bags", N = round(150*0.17, digits=0), P= round(150*0.075, digits=0), K = round(150 * 0.14, digits=0), price =150*615 ),## half balnket recom
  data.frame(type="NPK171717_9bags",N = round(450*0.17, digits=0), P= round(450*0.075, digits=0), K = round(450 * 0.14, digits=0), price=(450*615)),## 1.5* blanket recom
  data.frame(type="NPK171717_9bags_4bagsNPK15920",N = round(450*0.17, digits=0)+30, P= round(450*0.075, digits=0)+8, K = round(450 * 0.14, digits=0)+33, price=((450*615) + (200*615))),## 2* blanket recom
  data.frame(type="NPK171717_6bags_urea2bags",N = round(300*0.17, digits=0)+46, P= round(300*0.075, digits=0), K = round(300 * 0.14, digits=0), price = ((300*615) + 100*462)),## 2*N of blanket recom
  data.frame(type="NPK23105_6bags",N = round(300*0.23, digits=0), P= round(300*0.044, digits=0), K = round(300 * 0.042, digits=0), price= (511*300)),## INcrease N and reduce P and K
  data.frame(type="NPK15920_6bags",N = round(300*0.15, digits=0), P= round(300*0.04, digits=0), K = round(300 * 0.166, digits=0), price=(300*615)),## Increased K and reduce N&K
  data.frame(type="NPK15920_9bags",N = round(450*0.15, digits=0), P= round(450*0.04, digits=0), K = round(450 * 0.166, digits=0), price=(450*615)),## Increased N & K and reduce P
  data.frame(type="NPK15920_6bags_urea2bags",N = (round(300*0.15, digits=0) + 46), P= round(300*0.04, digits=0), K = round(300 * 0.166, digits=0), price=(300*615)+(100*462)),## Increased N & K and reduce P
  data.frame(type="NPK15920_6bags_DAP2bags",N = round(300*0.15, digits=0) +  18, P= round(300*0.04, digits=0)+20, K = round(300 * 0.166, digits=0), price=(615*300)+(100*511)),## slight increase of NPK
  data.frame(type="NPK23105_9bags",N = round(450*0.23, digits=0), P= round(450*0.044, digits=0), K = round(450 * 0.042, digits=0), price=(450*511)),## 2*N, increase K slight decrease P
  data.frame(type="NPK15920_5bags_DAP2bags",N = round(250*0.15, digits=0) +  18, P= round(250*0.04, digits=0)+20, K = round(250 * 0.166, digits=0), price=(250*615)+(100*511)),## almost blanket
  data.frame(type="NPK15920_4ags_urea2bags",N = (round(200*0.15, digits=0) + 46), P= round(200*0.04, digits=0), K = round(200 * 0.166, digits=0), price=(200*615)+(100*511)),## increae N&K reduce P
  data.frame(type="NPK15920_4bags_DAP4bags",N = round(200*0.15, digits=0) +  36, P= round(200*0.04, digits=0)+40, K = round(200 * 0.166, digits=0), price=(200*615)+(200*511)),##  increase N&P recue K,
  data.frame(type="NPK171717_3bags_MOP1bag",N = 26, P= 11, K = 46, price=117800),
  data.frame(type="NPK171717_6bags_MOP1bags",N = 51, P= 22, K = 67, price=210050),
  data.frame(type="NPK171717_6bags_MOP2bags",N = 51, P= 22, K = 92, price=235600),
  data.frame(type="NPK171717_9bags_MOP2bags",N = 76, P= 34, K = 113, price=327850),
  data.frame(type="NPK171717_6bags_MOP2bags_Urea2bags",N = round((300*0.17)+(46), digits=0), P= round((300*0.075), digits=0), K = round(300 * 0.14+(50), digits=0), price=(999)),## blanket recom
  data.frame(type="NPK171717_6bags_DAP2bags_Urea2bags",N = round((300*0.17)+(46) + (18), digits=0), P= round((300*0.075)+20, digits=0), K = round(300 * 0.14, digits=0), price=(999)),## blanket recom
  data.frame(type="NPK15920_6bags_DAP4bags",N = round(300*0.15, digits=0) +  36, P= round(300*0.04, digits=0)+40, K = round(200 * 0.166, digits=0), price=(300*615)+(200*511)),##  increase N&P recue K,
  data.frame(type="NPK23105_6bags_4bagsMOP",N = round(300*0.23, digits=0), P= round(300*0.044, digits=0), K = round(300 * 0.042+(200*0.5), digits=0), price= (511*300)),## INcrease N and reduce P and K
  data.frame(type="NPK171717_6bags_DAP2bags_Urea2bags_2bagsMOP",N = round((300*0.17)+(46) + (18), digits=0), P= round((300*0.075)+20, digits=0), K = round(300 * 0.14+(50), digits=0), price=(999))## blanket recom
  
)



write.csv(Ppacks, "Tested_Packages.csv", row.names=FALSE)


testPackages <- function (packs, soilwlycy, rootUP){
  DCY <- soilwlycy$Current_Yield
  CY_user <- DCY/0.21
  WLY_user <- soilwlycy$water_limited_yield /0.21
  rec <- c(packs$N, packs$P, packs$K)
  ## TY for ha of land 	
  TY <- QUEFTS1_Pedotransfer_potato(QID=soilwlycy, rec)	# Yield possible at recommended NPK in kg/ha dry wt. 
  ## both CY and TY should be changed to user land size in ton/ha and fresh wt
  TY_user <-  TY/0.21
  ## total cost per ha
  TC <- packs$price
  ## net revenue on the users land size
  GR <- (TY_user - CY_user) * rootUP                     
  NR <- round(GR - TC, digits=0)  											
  packs$WLY = WLY_user
  packs$TY = TY_user
  packs$CY = CY_user
  packs$NR = NR
  packs$TC = TC
  if (packs$NR < packs$TC * 0.2) {
    packs$NR = packs$TC = 0
  }
  packs$soilN <- soilwlycy$soilN
  packs$soilP <- soilwlycy$soilP
  packs$soilK <- soilwlycy$soilK
  return(packs)
}



PotatoPackages <- NULL
for (i in 1: nrow(Ppacks)){
  pp <- NULL
  for(j in 1:nrow(WLYCY)){
    pp <- rbind(pp,  testPackages(packs = Ppacks[i,], soilwlycy=WLYCY[j, ], rootUP=rootUP))
  }
  PotatoPackages <- rbind(PotatoPackages, pp)
}


head(PotatoPackages)


PotatoPackages$soil_N <- ifelse(PotatoPackages$soilN > 100, "High_N", ifelse(PotatoPackages$soilN < 50, "Low_N", "Med_N"))
PotatoPackages$soil_P <- ifelse(PotatoPackages$soilP > 70, "High_P", ifelse(PotatoPackages$soilP < 20, "Low_P", "Med_P"))
PotatoPackages$soil_K <- ifelse(PotatoPackages$soilK > 100, "High_K", ifelse(PotatoPackages$soilK < 50, "Low_K", "Med_K"))
PotatoPackages$soilNPK <- paste(PotatoPackages$soil_N, PotatoPackages$soil_P, PotatoPackages$soil_K, sep="_")

## Per the 27 soil NPK check which package give higher NR
dd <- PotatoPackages[PotatoPackages$soilNPK == "Low_N_Low_P_Low_K", ]
dd[order(dd$TY), ]
soilregime <- unique(PotatoPackages$soilNPK)
soilregime <- soilregime[soilregime!= "High_N_High_P_High_K"]

ggplot(PotatoPackages, aes(TY, NR, col=type))+
  geom_point()+
  facet_wrap(~soilNPK)

write.csv(PotatoPackages, "PotatoPackages.csv", row.names = FALSE)


bestpack <- NULL
for(k in soilregime){
  snpk_d <- droplevels(PotatoPackages[PotatoPackages$soilNPK == k, ])
  bestpack <- rbind(bestpack, snpk_d[snpk_d$NR == max(snpk_d$NR), ])
}
bestpack$type <- droplevels(bestpack$type)

packfreq <- as.data.frame(table(bestpack$type, bestpack$soilNPK))
packfreq <- packfreq[order(packfreq$Freq), ]


packfreq[packfreq$Var1 == "NPK23105_6bags" & packfreq$Freq>0, ]
packfreq[packfreq$Var1 == "NPK15920_6bags_urea2bags" & packfreq$Freq>0, ]
packfreq[packfreq$Var1 == "NPK171717_6bags_MOP2bags" & packfreq$Freq>0, ]
packfreq[packfreq$Var1 == "NPK171717_9bags_MOP2bags" & packfreq$Freq>0, ]



bestpack[bestpack$type == "NPK171717_9bags_4bagsNPK15920", ] ## low/med N  -  low K   
bestpack[bestpack$type == "NPK23105_6bags", ] ## low/med N  -  high K  
bestpack[bestpack$type == "NPK15920_6bags_urea2bags", ] ## low/med N  -  high K  
PotatoPackages <- PotatoPackages[PotatoPackages$NR > 0, ]

PotatoPackages$NRTC <- round(PotatoPackages$NR / PotatoPackages$TC, digits=0)
summary(PotatoPackages$NRTC)

## NPK171717_9bags_4bagsNPK15920 is very expensive fertiizer that will not be adviced any where
PotatoPackages <- droplevels(PotatoPackages[PotatoPackages$type != "NPK171717_9bags_4bagsNPK15920", ])

c("High_N_High_P_High_K", "High_N_Med_P_High_K")## no need of applying fertilizers



PotatoPackages$NR_USD <- PotatoPackages$NR/962
PotatoPackages$TC_USD <- PotatoPackages$TC/962

ggplot(PotatoPackages, aes(NRTC, NR_USD, col=type, shape = type))+
  geom_point(size=2)+
  facet_wrap(~soilNPK, scales = 'free_x') +
  scale_shape_manual(values = c(0,1,2,3,4,5,7,8,10,11,13,15,16,17,18,23,25 )) + 
  xlab("NR:TC ratio")+ ylab("NR in USD")+
  ggtitle(unique(snpk_d$soilNPK))+
  theme_bw()+
  theme()


Pac_NPK23105_6bags <- c("High_N_Med_P_Med_K", "Low_N_Low_P_Low_K", "Med_N_High_P_Med_K", "Med_N_Med_P_Med_K",
                        "Low_N_High_P_High_K", "Low_N_Med_P_High_K", "Med_N_Low_p_High_K","Low_N_High_P_Med_K",
                        "Low_N_Med_P_Med_K", "Med_N_Low_P_Med_K", "Low_N_low_P_High_K")

Pac_NPK171717_3bags <- c("High_N_High_P_Low_K","High_N_Med_P_Low_K","High_N_Med_P_Med_K", "High_N_Low_P_High_K",
                         "High_N_Low_P_Med_K")

Pac_NK15920_4bags_urea2bags <- c("Low_N_High_P_Low_K", "Low_N_Med_P_Low_K")


highN <- PotatoPackages[PotatoPackages$soil_N == "High_N", ]
medN <- PotatoPackages[PotatoPackages$soil_N == "Med_N", ]
lowN <- PotatoPackages[PotatoPackages$soil_N == "Low_N", ]

highK <- PotatoPackages[PotatoPackages$soil_K == "High_K", ]
medK <- PotatoPackages[PotatoPackages$soil_K == "Med_K", ]
lowK <- PotatoPackages[PotatoPackages$soil_K == "Low_K", ]

HN <- ggplot(highK, aes(NRTC, NR_USD, col=type, shape = type))+
  geom_point(size=3)+
  facet_grid(soilNPK~.) +
  scale_shape_manual(values = c(0,1,2,3,4,5,7,8,10,11,13,15,16,17,18,23,25)) + 
  xlab("NR:TC ratio")+ ylab("NR in USD")+
  ggtitle("High N")+
  theme_bw()+
  theme(strip.text.y = element_text(angle=0),plot.title = element_text(hjust = 0.5), axis.text.x = element_text(size=14)
        , axis.title = element_text(size=14))

MN <- ggplot(medK, aes(NRTC, NR_USD, col=type, shape = type))+
  geom_point(size=3)+
  facet_grid(soilNPK~.) +
  scale_shape_manual(values = c(0,1,2,3,4,5,7,8,10,11,13,15,16,17,18,23,25)) + 
  xlab("NR:TC ratio")+ ylab("NR in USD")+
  ggtitle("Med N")+
  theme_bw()+
  theme(strip.text.y = element_text(angle=0),plot.title = element_text(hjust = 0.5), axis.text.x = element_text(size=14)
        , axis.title = element_text(size=14))


LN<- ggplot(lowK, aes(NRTC, NR_USD, col=type, shape = type))+
  geom_point(size=3)+
  facet_grid(soilNPK~.) +
  scale_shape_manual(values = c(0,1,2,3,4,5,7,8,10,11,13,15,16,17,18,23,25)) + 
  xlab("NR:TC ratio")+ ylab("NR in USD")+
  ggtitle("Low N")+
  theme_bw()+
  theme(strip.text.y = element_text(angle=0),plot.title = element_text(hjust = 0.5), axis.text.x = element_text(size=14)
        , axis.title = element_text(size=14))


ggsave("High_N_Package.png", HN, width=10, height = 8)
ggsave("Med_N_Package.png", MN, width=10, height = 8)
ggsave("Low_N_Package.png", LN, width=10, height = 8)




























######################################################################################################################3
## acai NOT data ANALYSIS 
######################################################################################################################3

unique(ds_NOT[ds_NOT$fieldID == "", ]$trialID)


TZ_LakeSouth_20172018 <- droplevels(ds_NOT[ds_NOT$country == "Tanzania" & ds_NOT$harvestYear %in% c(2017,2018) & 
                                             ds_NOT$Region %in% c("TZ_lake","TZ_south"), ])
str(TZ_LakeSouth_20172018)

## there are plots without fieldID, either due to plot id replacments or the trial not being linked to field ID at assign
## if it is due to replacment, the original plot ids have field id so assign that one for all other plots within the same trial
## if it is due to not being linked at assign, use the trialId as field ID for modelling. 

dsfieldOk <- unique(droplevels(ds_NOT[!ds_NOT$trialID %in% unique(ds_NOT[ds_NOT$fieldID == "", ]$trialID), ]))
dsfieldNot <- droplevels(ds_NOT[ds_NOT$trialID %in% unique(ds_NOT[ds_NOT$fieldID == "", ]$trialID), ])


FF <- NULL
for (k in unique(dsfieldNot$trialID)){
  td <- droplevels(dsfieldNot[dsfieldNot$trialID == k, ])
  fids <- unique(td$fieldID)
  if(length(fids[grep("FD", fids)]) == 0){
    td$fieldID <- td$trialID[1]
  }else if(length(fids[grep("FD", fids)]) == 1){
    td$fieldID <- fids[grep("FD", fids)]
  }else if (length(fids[grep("FD", fids)]) == 2){
    nrow1 <- td[td$fieldID == fids[grep("FD", fids)][1], ]
    nrow2 <- td[td$fieldID == fids[grep("FD", fids)][2], ]
    if(nrow(nrow1) > nrow(nrow2)){
      td$fieldID <- fids[grep("FD", fids)][1]
    }else{
      td$fieldID <- fids[grep("FD", fids)][2]
    }
  }
  FF <- rbind(FF, td)
}

FF <- unique(FF)

ds_NOT <- rbind(dsfieldOk, FF)
ds_NOT <- unique(ds_NOT)
str(ds_NOT)

ds_NOT[ds_NOT$plotID == "ACPONG034643", ]
ds_NOT[ds_NOT$trialID == "ACTLNG005415", ]


ggplot(ds_NOT, aes(treat, yield, fill=treat))+
  geom_boxplot()+
  facet_grid(harvestYear~Region, scales="free")+
  ylab("Root yield (t/ha)")+
  theme_bw()+
  theme(axis.title.y = element_text(size=16, face="bold"),
        axis.title.x = element_blank(),
        axis.text = element_text(size=14),
        axis.text.x = element_text(angle=-90, hjust=0, vjust=.5),
        strip.text = element_text(size=16, face="bold"))



TZ_LakeSouth_20172018_B <- droplevels(ds_NOT[ds_NOT$country == "Tanzania" & ds_NOT$harvestYear %in% c(2017,2018) & 
                                               ds_NOT$Region %in% c("TZ_lake","TZ_south"), ])

write.csv(TZ_LakeSouth_20172018_B, "TZ_LakeSouth_20172018.csv", row.names = FALSE)
## TZ_east is a very poor data, we exclude these from getting the BLUPS
ds_NOT <- droplevels(ds_NOT[!ds_NOT$Region  == "TZ_east", ])

## model to get BLUP
mod1 <- lmer(data=ds_NOT, yield ~ 0 + treat + country:harvestYear + (0+treat|fieldID))
mod2 <- lmer(data=ds_NOT, yield ~ 0 + treat:country:harvestYear + (0+treat|fieldID))
mod2 <- lmer(data=ds_NOT, yield ~ 0 + treat * (country:harvestYear) + (0+treat|fieldID))


anova(mod1, mod2)
anova(mod2, mod2)


ds_NOT$Treatment <- ds_NOT$treat

gg2 <- ggplot(ds_NOT, aes(harvestYear, yield, fill=Treatment)) +
  geom_boxplot() +
  facet_wrap(~country) +
  ylab("Root yield (t/ha)") +
  theme_bw() +
  theme(axis.title.y = element_text(size=14, face="bold"),
        axis.title.x = element_blank(),
        axis.text = element_text(size=12),
        axis.text.x = element_text(hjust=0.5, vjust=.5),
        strip.text = element_text(size=14, face="bold"),
        legend.text =element_text(size=14), legend.title = element_blank())

ggsave("DataExp2.png", gg2, width=8, height = 4)

dsTZ <- droplevels(ds_NOT[ds_NOT$country == "Tanzania", ])
dsNG <- droplevels(ds_NOT[ds_NOT$country == "Nigeria", ])
dsTZ <- droplevels(dsTZ[dsTZ$yield <= 75, ])

ds_NOT <- rbind(dsTZ, dsNG)



mod2 <- lmer(data=ds_NOT, yield ~ treat:country:harvestYear + (0+treat|fieldID))
summary(mod2)
ref.grid(mod2)
ref.grid(mod2, emm_options(pbkrtest.limit = 4648))
lsm3 <- lsmeans(mod2, c("treat", "country", "harvestYear"))
lsmDF3 <- as.data.frame(lsm3)
lsmDF3 <- lsmDF3[,1:4]
names(lsmDF3)[1] <- "treatment"
names(lsmDF3)[4] <- "lsmean"
row.names(lsmDF3) <- NULL


ran3 <- ranef(mod2)$fieldID
ran3$fieldID <- row.names(ran3)
names(ran3) <- gsub("treat", "", names(ran3))
ran3 <- melt(ran3, 
             measure.vars=c("NPK", "PK", "NK", "NP", "half_NPK", "CON"),
             variable.name="treatment",
             id.vars="fieldID",
             value.name="blup")

ran3 <- merge(ran3, unique(ds_NOT[, c("fieldID", "harvestYear", "country")]), by="fieldID")


head(ran3)
ggplot(ran3[ran3$country == "Nigeria", ], aes(fieldID, blup, col=treatment))+
  geom_point()+
  geom_abline(slope=0, intercept = 0)+
  facet_grid(treatment ~ country)+
  theme_bw()+
  theme(axis.text.x = element_blank())




ran3 <- merge(ran3, lsmDF3, by=c("treatment", "harvestYear", "country"))
ran3$blup <- ran3$blup + ran3$lsmean
ran3 <- subset(ran3, select=-c(lsmean))
head(ran3)


ran_NG_2017 <- ran3[ran3$harvestYear == 2017 & ran3$country == "Nigeria", ]
ran_NG_2018 <- ran3[ran3$harvestYear == 2018 & ran3$country == "Nigeria", ]
ran_NG_2019 <- ran3[ran3$harvestYear == 2019 & ran3$country == "Nigeria", ]
ran_TZ_2017 <- ran3[ran3$harvestYear == 2017 & ran3$country == "Tanzania", ]
ran_TZ_2018 <- ran3[ran3$harvestYear == 2018 & ran3$country == "Tanzania", ]
ran_TZ_2019 <- ran3[ran3$harvestYear == 2019 & ran3$country == "Tanzania", ]
ranCY <- rbind(ran_NG_2017, ran_NG_2018, ran_NG_2019, ran_TZ_2017, ran_TZ_2018, ran_TZ_2019)
head(ranCY)
ddply(ranCY, .(country, treatment), summarize, avyeartreat = mean(blup))


ran.w <- spread(ranCY, treatment, blup)
head(ran.w)


saveRDS(ran.w, "NOT_Blups_2019.RDS")
longData <- gather(ran.w, treat, value, PK:CON)
head(longData)


longData <- longData[!longData$treat == "half_NPK", ]
longData$YieldDiff <- longData$NPK - longData$value
longData$NutrientMissing <- ifelse(longData$treat == "NP", "K", ifelse(longData$treat == "NK", "P", 
                                                                       ifelse(longData$treat == "PK", "N", "NPK")))

longData$TreatDiff <- ifelse(longData$treat == "NP", "NPK - NP (K Eff.)", 
                             ifelse(longData$treat == "NK", "NPK - NK (P Eff.)", 
                                    ifelse(longData$treat == "PK", "NPK - PK (N Eff.)", "NPK - CON (NPK Eff.)")))
longData <- droplevels(longData[longData$YieldDiff <= 30, ])

longData$NutrientMissing <- factor(longData$NutrientMissing, levels = c("N", "P", "K", "NPK"))
longData$TreatDiff <- factor(longData$TreatDiff, levels = c("NPK - CON (NPK Eff.)", "NPK - PK (N Eff.)", 
                                                            "NPK - NP (K Eff.)","NPK - NK (P Eff.)"))

gg3 <- ggplot(data=longData, aes(value, y=YieldDiff, col = country)) +
  facet_grid(harvestYear~TreatDiff) +
  stat_density2d() +
  geom_point(size=1.2) +
  ylim(-5,20)+
  geom_abline(intercept=0,slope=0, col="darkgrey") +
  scale_color_manual(values=c("black", "orange")) +
  xlab("Root yield [t/ha] (facets displaying blups for CON, PK, NP and NK treatments)") +
  ylab("Yield response to nutrient applied [t/ha]") +
  theme_bw() +
  theme(axis.text  = element_text(size=13),
        axis.title = element_text(size=15), strip.text = element_text(size=14), 
        legend.text =element_text(size=15), legend.title = element_blank())



ggsave("NOT_BLUPS_Dec2019.png", gg3, width=12, height = 6)

ddply(longData, .(country, TreatDiff), summarize, tt = mean(YieldDiff))
NG_Neff <- droplevels(longData[longData$country == "Nigeria" & longData$TreatDiff == "NPK - NK (P Eff.)", ])
summary(NG_Neff$YieldDiff)



#######################################################################################
## there are few field ids with multiple gps reading but all but one are very close gps readings: take one of the readings
## this will be used after getting the blups to get ISRIC soil data for field trial locations
#######################################################################################

setwd("/home/akilimo/lintul/dataSources/NOT")
NOT_blups <- readRDS("NOT_Blups_2019.RDS")


ds_NOT$location <- paste(ds_NOT$lat, ds_NOT$lon, sep="_")
Q <- unique(ds_NOT[, c("fieldID", "location")])
tt <- as.data.frame(table(Q$fieldID))
rr <- as.data.frame(table(Q$location))
oneloc <- unique(droplevels(ds_NOT[ds_NOT$fieldID %in% droplevels(tt[tt$Freq > 1, ]$Var1), 
                                   c("lat", "lon", "Admin1","Admin2","Region","location", "fieldID", "harvestMonth", "harvestDate")]))
Multloc <- unique(droplevels(ds_NOT[!ds_NOT$fieldID %in% droplevels(tt[tt$Freq > 1, ]$Var1), 
                                    c("lat", "lon", "Admin1","Admin2","Region","location", "fieldID", "harvestMonth", "harvestDate")]))

onleloc_corr <- NULL
for( fd in unique(oneloc$fieldID)){
  onleloc_corr <- rbind(onleloc_corr, oneloc[oneloc$fieldID == fd, ][1,])
}

NOT_gps <- rbind(onleloc_corr, Multloc)
length(unique(NOT_gps$fieldID))

NOT_blups <- merge(NOT_blups, NOT_gps, by="fieldID")
head(NOT_blups)
NOT_blups$Hdate <- paste(NOT_blups$harvestDate,"/", NOT_blups$harvestMonth, "/", NOT_blups$harvestYear, sep="")
head(NOT_blups)
NOT_blups$Hdate <- as.Date(NOT_blups$Hdate, format = '%d/%b/%Y')
NOT_blups$Hnr <- lubridate::yday(NOT_blups$Hdate)
saveRDS(NOT_blups, "NOT_blups_GPS.RDS")

#######################################################################################
## use QUEFTS to get soil NPK; change fresh matter to drymatter and then run QUEFTS
#######################################################################################
GIS_BLUP_NOT <- readRDS("NOT_blups_GPS.RDS")

GIS_BLUP_NOT_NG <- droplevels(GIS_BLUP_NOT[grep("NG", GIS_BLUP_NOT$fieldID), ])
GIS_BLUP_NOT_TZ <- droplevels(GIS_BLUP_NOT[grep("TZ", GIS_BLUP_NOT$fieldID), ])

length(unique(GIS_BLUP_NOT_NG$fieldID))##231
length(unique(GIS_BLUP_NOT_TZ$fieldID))##243

### change fresh matter to dry matter

getRDY <- function(HD, RFY, country){
  #SHORT DEF:   Function to convert root FM yield into root dry matter yield (RDY): user define CY in FM in ton/ha, QUEFTS require Cy in DM kg/ha
  #RETURNS:     RDY: root dry yield in the same units as root FM yield input
  #INPUT:       HD: harvest date (Date format)
  #             RFY: root fresh matter yield (user's units)
  #             country = c("NG", "TZ")
  d <- HD
  fd <- read.csv("/home/akilimo/lintul/dataSources/fd2.csv")
  DC <- merge(data.frame(dayNr=d), fd[fd$country==country,], sort=FALSE)$DMCont
  RDY <- (RFY * DC)/100
  return(RDY)
}

GIS_BLUP_NOT_NG$DM_NPK <- NULL
GIS_BLUP_NOT_NG$DM_PK <- NULL
GIS_BLUP_NOT_NG$DM_NK <- NULL
GIS_BLUP_NOT_NG$DM_NP <- NULL
GIS_BLUP_NOT_NG$DM_half_NPK <- NULL
GIS_BLUP_NOT_NG$DM_CON <- NULL
for(k in 1:nrow(GIS_BLUP_NOT_NG)){
  GIS_BLUP_NOT_NG$DM_NPK[k] <- getRDY(HD=GIS_BLUP_NOT_NG$Hnr[k], RFY = GIS_BLUP_NOT_NG$NPK[k], country = "NG")*1000
  GIS_BLUP_NOT_NG$DM_PK[k] <- getRDY(HD=GIS_BLUP_NOT_NG$Hnr[k], RFY = GIS_BLUP_NOT_NG$PK[k], country = "NG")*1000
  GIS_BLUP_NOT_NG$DM_NK[k] <- getRDY(HD=GIS_BLUP_NOT_NG$Hnr[k], RFY = GIS_BLUP_NOT_NG$NK[k], country = "NG")*1000
  GIS_BLUP_NOT_NG$DM_NP[k] <- getRDY(HD=GIS_BLUP_NOT_NG$Hnr[k], RFY = GIS_BLUP_NOT$NP[k], country = "NG")*1000
  GIS_BLUP_NOT_NG$DM_half_NPK[k] <- getRDY(HD=GIS_BLUP_NOT_NG$Hnr[k], RFY = GIS_BLUP_NOT_NG$half_NPK[k], country = "NG")*1000
  GIS_BLUP_NOT_NG$DM_CON[k] <- getRDY(HD=GIS_BLUP_NOT_NG$Hnr[k], RFY = GIS_BLUP_NOT_NG$CON[k], country = "NG")*1000
}

head(GIS_BLUP_NOT_NG)

GIS_BLUP_NOT_TZ$DM_NPK <- NULL
GIS_BLUP_NOT_TZ$DM_PK <- NULL
GIS_BLUP_NOT_TZ$DM_NK <- NULL
GIS_BLUP_NOT_TZ$DM_NP <- NULL
GIS_BLUP_NOT_TZ$DM_half_NPK <- NULL
GIS_BLUP_NOT_TZ$DM_CON <- NULL
for(k in 1:nrow(GIS_BLUP_NOT_TZ)){
  GIS_BLUP_NOT_TZ$DM_NPK[k] <- getRDY(HD=GIS_BLUP_NOT_TZ$Hnr[k], RFY = GIS_BLUP_NOT_TZ$NPK[k], country = "NG")*1000
  GIS_BLUP_NOT_TZ$DM_PK[k] <- getRDY(HD=GIS_BLUP_NOT_TZ$Hnr[k], RFY = GIS_BLUP_NOT_TZ$PK[k], country = "NG")*1000
  GIS_BLUP_NOT_TZ$DM_NK[k] <- getRDY(HD=GIS_BLUP_NOT_TZ$Hnr[k], RFY = GIS_BLUP_NOT_TZ$NK[k], country = "NG")*1000
  GIS_BLUP_NOT_TZ$DM_NP[k] <- getRDY(HD=GIS_BLUP_NOT_TZ$Hnr[k], RFY = GIS_BLUP_NOT_TZ$NP[k], country = "NG")*1000
  GIS_BLUP_NOT_TZ$DM_half_NPK[k] <- getRDY(HD=GIS_BLUP_NOT_TZ$Hnr[k], RFY = GIS_BLUP_NOT_TZ$half_NPK[k], country = "NG")*1000
  GIS_BLUP_NOT_TZ$DM_CON[k] <- getRDY(HD=GIS_BLUP_NOT_TZ$Hnr[k], RFY = GIS_BLUP_NOT_TZ$CON[k], country = "NG")*1000
}

head(GIS_BLUP_NOT_TZ)
setwd("/home/akilimo/lintul/dataSources/NOT")
GIS_BLUP_NOT <- rbind(GIS_BLUP_NOT_TZ, GIS_BLUP_NOT_NG)
saveRDS(GIS_BLUP_NOT, "NOT_blups_GPS_DM.RDS")


require(lpSolve)
nutrient_uptake <- function(S1=NA, S2=NA, d1=NA, a1=NA, d2=NA, a2=NA, r1=NA, r2=NA) {	
  # N, P and K uptakes based on QUEFTS
  if (S1 < r1 + ((S2 - r2) * a2 / d1)) {
    uptakeX_givenY = S1
  } else if (S1 > r1 + ((S2 - r2) * (2 * d2 / a1 - a2 / d1))) {
    uptakeX_givenY = r1 + (S2 - r2) * (d2 / a1)
  } else {
    uptakeX_givenY = S1 - 0.25 * (S1 - r1 - (S2 - r2) * (a2 / d1))^2 / ((S2 - r2) * (d2 / a1 - a2 / d1))
  }
  # Nutrient uptake given availability of other nutrient
  return(uptakeX_givenY)
}

yield_nutrients_combined <- function(U1=NA, d1=NA, a1=NA, Y2A=NA, Y2D=NA, Y3D=NA, r1=NA){	
  # Determine which nutrient limited to force yield being the lowest.
  YxD = min(Y2D, Y3D)	
  # If the uptake of one of the nutrients, and therefore the yield associated with that 
  # nutrient, is zero the overall yield is also zero.
  if (U1 == 0 || YxD == 0) {
    Y12 = 0
  }else{
    Y12 = Y2A + (2 * (YxD - Y2A) * (U1 - r1 - Y2A / d1)) / (YxD / a1 - Y2A / d1) -
      (YxD - Y2A) * (U1 - r1 - Y2A / d1)^2 / (YxD / a1 - Y2A / d1)^2
  }
  # Return the calculated yield based on the uptake of nutrients 1 and 2
  return(Y12)
}

water_dependent_nutrient_uptake <- function(S1=NA, WLY=NA, d1=NA, a1=NA, r1=NA) {	
  if (S1 < r1 + WLY / d1) {
    uptakeX_givenWater = S1    
  } else if (S1 > r1 + 2*WLY/a1 - WLY/d1) {
    uptakeX_givenWater = WLY / a1    
  } else {
    uptakeX_givenWater = S1 - 0.25 * (S1 - r1 - WLY/d1)^2 / (WLY / a1 - WLY / d1)    
  }
  
  return(uptakeX_givenWater)
}


NUE <- function(HI, CmaxNroots=6.6, CminNroots=2.5, CmaxNtops=17.9, CminNtops=7.9, CmaxProots=1.5, CminProots=0.8, CmaxPtops=2.8, CminPtops=0.9, 
                CmaxKroots=11, CminKroots=2.8, CmaxKtops=18.8, CminKtops=3.4 ){
  aN = round(1000 * HI/(HI * CmaxNroots + (1 - HI) * CmaxNtops), digits=0)
  dN = round(1000 * HI/(HI * CminNroots + (1 - HI) * CminNtops), digits=0)
  
  aP = round(1000 * HI/(HI * CmaxProots + (1 - HI) * CmaxPtops), digits=0)
  dP = round(1000 * HI/(HI * CminProots + (1 - HI) * CminPtops), digits=0)
  
  aK = round(1000 * HI/(HI * CmaxKroots + (1 - HI) * CmaxKtops), digits=0)
  dK = round(1000 * HI/(HI * CminKroots + (1 - HI) * CminKtops), digits=0)
  
  return(data.frame(aN=aN, dN=dN,aP=aP,dP=dP,aK=aK,dK=dK))
  
}

NUE(HI=0.5)


## Maize: aN=29	; dN=74	; aP=95	; dP=476; aK=38	; dK=143	; rN=5	; rP=0.4	, rK=2
## Cassava: aN=41, dN=96, rN=0, aP=233, dP=588, rP=0, aK=34, dK=161, rK=0)
##Yield_S <- function(SN, SP, SK, WLY, aN=41, dN=96, rN=0, aP=233, dP=588, rP=0, aK=34, dK=161, rK=0){ ## cassava
Yield_S <- function(SN, SP, SK, WLY, aN=41, dN=96, rN=0, aP=233, dP=588, rP=0, aK=34, dK=161, rK=0){
  ## uptake of one nutrient conditioned by availablility of one other nutrient or water, rule of minimum
  UNP <- nutrient_uptake(S1 = SN, S2 = SP, d1 = dN, a1 = aN, d2 = dP, a2 = aP, r1 = rN, r2 = rP)
  UNK <- nutrient_uptake(S1 = SN, S2 = SK, d1 = dN, a1 = aN, d2 = dK, a2 = aK, r1 = rN, r2 = rK)
  UNW <- water_dependent_nutrient_uptake(S1 = SN, WLY = WLY, d1 = dN, a1 = aN, r1 = rN)
  UN <- min(UNP, UNK, UNW)
  
  UPN <- nutrient_uptake(S1 = SP, S2 = SN, d1 = dP, a1 = aP, d2 = dN, a2 = aN, r1 = rP, r2 = rN)
  UPK <- nutrient_uptake(S1 = SP, S2 = SK, d1 = dP, a1 = aP, d2 = dK, a2 = aK, r1 = rP, r2 = rK)
  UPW <- water_dependent_nutrient_uptake(S1 = SP, WLY = WLY, d1 = dP, a1 = aP, r1 = rP)
  UP <- min(UPN, UPK, UPW)				
  
  UKN <- nutrient_uptake(S1 = SK, S2 = SN, d1 = dK, a1 = aK, d2 = dN, a2 = aN, r1 = rK, r2 = rN)
  UKP <- nutrient_uptake(S1 = SK, S2 = SP, d1 = dK, a1 = aK, d2 = dP, a2 = aP, r1 = rK, r2 = rP)
  UKW <- water_dependent_nutrient_uptake(S1 = SK, WLY = WLY, d1 = dK, a1 = aK, r1 = rK)
  UK <- min(UKN, UKP, UKW)		
  
  ## yield base don uptake of Nitrogen at dilution or accumulation
  YNA <- max((UN - rN), 0) * aN
  YND <- max((UN - rN), 0) * dN
  YPA <- max((UP - rP), 0) * aP
  YPD <- max((UP - rP), 0) * dP
  YKA <- max((UK - rK), 0) * aK
  YKD <- max((UK - rK), 0) * dK	
  
  ## yield as defined by uptake of one nutrient conditioned to max and min yield based on secnd nutrient not to be constrined by a third nutrient
  YNP <- yield_nutrients_combined(U1 = UN, d1 = dN, a1 = aN, Y2A = YPA, Y2D = YPD, Y3D = YKD, r1 = rN)
  YNK <- yield_nutrients_combined(U1 = UN, d1 = dN, a1 = aN, Y2A = YKA, Y2D = YKD, Y3D = YPD, r1 = rN)
  YPN <- yield_nutrients_combined(U1 = UP, d1 = dP, a1 = aP, Y2A = YNA, Y2D = YND, Y3D = YKD, r1 = rP)
  YPK <- yield_nutrients_combined(U1 = UP, d1 = dP, a1 = aP, Y2A = YKA, Y2D = YKD, Y3D = YND, r1 = rP)
  YKN <- yield_nutrients_combined(U1 = UK, d1 = dK, a1 = aK, Y2A = YNA, Y2D = YND, Y3D = YPD, r1 = rK)
  YKP <- yield_nutrients_combined(U1 = UK, d1 = dK, a1 = aK, Y2A = YPA, Y2D = YPD, Y3D = YND, r1 = rK)
  
  # Make sure the nutrient limited yields do not exceed the maximum possible yield = WLY
  YNPc <- min(c(YNP, YND, YPD, YKD, WLY))
  YNKc <- min(c(YNK, YND, YPD, YKD, WLY))
  YPNc <- min(c(YPN, YND, YPD, YKD, WLY))
  YPKc <- min(c(YPK, YND, YPD, YKD, WLY))
  YKNc <- min(c(YKN, YND, YPD, YKD, WLY))
  YKPc <- min(c(YKP, YND, YPD, YKD, WLY))
  
  #Final estimate
  YEc <- mean(c(YNPc, YNKc, YPNc, YPKc, YKNc, YKPc))
  return(YEc)				
}




## get yield estimates using QUEFTS and giving in SN, SP, SK and WLY.  get TSS for the estimates of yield and NOT blup. 
## SN, SP and SK are supposed to be the sum of what the soil originally has and added fertilizer. 
## added fertilizer is provided as it is defined in NOT trials and INS is the one to be defined by iterating on while controling the output
## by the squared yield difference between simulated and measured from NOT  (Blups of NOT)
optim_INS <- function(INS, addedFertilizer, YieldMatrix, WLY){	
  
  SoilN <- INS[1]
  SoilP <- INS[2]
  SoilK <- INS[3]
  
  #Yield_Control <- Yield_S(SN = addedFertilizer[1,1] + SoilN , SP = addedFertilizer[1,2] + SoilP, SK = addedFertilizer[1,3] + SoilK, WLY = WLY)
  Yield_pk <- Yield_S(SN = (addedFertilizer[2,1]*RecoveryFraction$rec_N) + SoilN , 
                      SP = (addedFertilizer[2,2]*RecoveryFraction$rec_P) + SoilP, 
                      SK = (addedFertilizer[2,3]*RecoveryFraction$rec_K) + SoilK, WLY = WLY)
  
  Yield_nk <- Yield_S(SN = (addedFertilizer[3,1]*RecoveryFraction$rec_N) + SoilN ,
                      SP = (addedFertilizer[3,2]*RecoveryFraction$rec_P) + SoilP, 
                      SK = (addedFertilizer[3,3]*RecoveryFraction$rec_K) + SoilK, WLY = WLY)
  
  Yield_np <- Yield_S(SN = (addedFertilizer[4,1]*RecoveryFraction$rec_N) + SoilN , 
                      SP = (addedFertilizer[4,2]*RecoveryFraction$rec_P) + SoilP, 
                      SK = (addedFertilizer[4,3]*RecoveryFraction$rec_K) + SoilK, WLY = WLY)
  #Yield_halfnpk <- Yield_S(SN = addedFertilizer[5,1] + SoilN , SP = addedFertilizer[5,2] + SoilP, SK = addedFertilizer[5,3] + SoilK, WLY = WLY)
  
  
  #control_SS <- (Yield_Control - YieldMatrix$NOT_yield[1])^2
  pk_SS <- (Yield_pk - YieldMatrix$NOT_yield[2])^2
  nk_SS <- (Yield_nk - YieldMatrix$NOT_yield[3])^2
  np_SS <- (Yield_np - YieldMatrix$NOT_yield[4])^2
  #halfnpk_SS <- (Yield_halfnpk - YieldMatrix$NOT_yield[5])^2
  
  TSS <- sum( pk_SS, nk_SS, np_SS)	
  return(TSS)	
}


RecoveryFraction <- data.frame(rec_N=0.5, rec_P=0.21, rec_K=0.49)


NOT_blups <- GIS_BLUP_NOT

soilNPK <- NULL
validation_result <- NULL
for(i in 1:nrow(NOT_blups)){
  oneTrial <- NOT_blups[i,]	
  # WLY <- oneTrial$NPK/3*1000 # convert to kg.ha
  WLY <- oneTrial$DM_NPK # is in kg/ha dry root
  
  addedFertilizer <- as.data.frame(matrix(nrow=5,ncol=3))
  colnames(addedFertilizer) <- c("FN", "FP", "FK")
  addedFertilizer[,1] <- c(0, 0, 150, 150, 75)
  addedFertilizer[,2] <- c(0, 40, 0, 40, 20)
  addedFertilizer[,3] <- c(0, 180, 180, 0, 90)	
  
  YieldMatrix <- as.data.frame(matrix(nrow=5,ncol=2))
  colnames(YieldMatrix) <- c("treatment", "NOT_yield")
  YieldMatrix[,1] <- c("Control", "PK", "NK", "NP", "halfNPK")
  YieldMatrix[,2] <- c(oneTrial$DM_CON, oneTrial$DM_PK, oneTrial$DM_NK, oneTrial$DM_NP, oneTrial$DM_half_NPK)
  
  ## optimizing by min TSS. 
  soil_NPK <- optim(par=c(0,0,0), optim_INS, lower=c(0.1, 0.1, 0.1), method = "L-BFGS-B", addedFertilizer=addedFertilizer, YieldMatrix=YieldMatrix, WLY=WLY)$par	
  
  Yield_halfnpk <- Yield_S(SN = addedFertilizer[5,1] + soil_NPK[1] , SP = addedFertilizer[5,2] + soil_NPK[2], 
                           SK = addedFertilizer[5,3] + soil_NPK[3], WLY = WLY)
  oneTrial$halfNPK_observed <- oneTrial$DM_half_NPK
  oneTrial$halfNPK_estimated <-  Yield_halfnpk
  
  Yield_control <- Yield_S(SN = addedFertilizer[1,1] + soil_NPK[1] , SP = addedFertilizer[1,2] + soil_NPK[2], 
                           SK = addedFertilizer[1,3] + soil_NPK[3], WLY = WLY)
  oneTrial$Control_observed <- oneTrial$DM_CON
  oneTrial$Control_estimated <-  Yield_control
  
  validation_result <- rbind(validation_result, oneTrial)
  
  snpk <- oneTrial
  snpk$soilN <- soil_NPK[1]
  snpk$soilP <- soil_NPK[2]
  snpk$soilK <- soil_NPK[3]
  soilNPK <- rbind(soilNPK, snpk)
}

require(ggplot2)
require(tidyr)

setwd("/home/akilimo/lintul/dataSources/NOT")
saveRDS(validation_result, "validation_result.RDS")
saveRDS(soilNPK, "soilNPK.rds")

ggplot(validation_result, aes(Control_estimated, Control_observed))+
  geom_point()+ 
  #geom_abline(intercept=0, slope=1)+
  geom_smooth(method=lm)+
  stat_density2d()+ 
  xlab(" Control estimated through QUEFTS")+ ylab("Control BLUPS")+
  theme_bw()

ggplot(validation_result, aes(halfNPK_estimated, halfNPK_observed))+
  geom_point()+ 
  geom_abline(intercept=0, slope=1)+
  geom_smooth(method=lm)+
  stat_density2d()+ 
  xlab(" halfNPK estimated through QUEFTS")+ ylab("halfNPK BLUPS")+
  theme_bw()


setwd("/home/akilimo/lintul/dataSources/NOT")
saveRDS(soilNPK, "soilNPK_QUEFTS.RDS")
head(soilNPK)



################################################################################
### get soil grids soil parameters for NOT : 250m resolution
################################################################################
library(parallel)
library(doParallel)
library(foreach)
setwd("/home/akilimo/lintul/dataSources/NOT")
NOT_blups_GPS <- readRDS("soilNPK_QUEFTS.RDS")
setwd("/home/akilimo/lintul")
source("sourceISRIC.R")
coordPoints <- unique(NOT_blups_GPS[, c("lon","lat")])
ISRIC_NOT <- getISRICSoilData(coordPoints = coordPoints, RangeRows = c(1:nrow(coordPoints)),
                              nrCluster = 15, fNames = "ISRICsoilNOT", outputPath = "/home/akilimo/lintul/dataSources/NOT")
head(ISRIC_NOT)


#######################################################################################
## get rainfall data for NOT field ids. if there is no planting date information ake a year before harvest as pplanting date
#######################################################################################
setwd("D:/ACAI_Wrapper/cloud_compute/LINTUL")
soilNPK <- readRDS("soilNPK_QUEFTS.RDS")

source("Rainfall_NASA.R")
NOT16RF <- unique(soilNPK[, c("lon", "lat")])
rownames(NOT16RF) <- NULL
colnames(NOT16RF) <- c("x","y")
extract_rf(year=2016,  leapYear = TRUE, country ="NOT", coordPoints = NOT16RF, 
           outputPath = "D:/ACAI_Wrapper/cloud_compute/LINTUL")
extract_rf(year=2017,  leapYear = FALSE, country ="NOT", coordPoints = NOT16RF, 
           outputPath = "D:/ACAI_Wrapper/cloud_compute/LINTUL")
extract_rf(year=2018,  leapYear = FALSE, country ="NOT", coordPoints = NOT16RF, 
           outputPath = "D:/ACAI_Wrapper/cloud_compute/LINTUL")
extract_rf(year=2019, leapYear = FALSE, country ="NOT", coordPoints = NOT16RF, ## 2019 we do not have complete data from Chirps
           outputPath = "D:/ACAI_Wrapper/cloud_compute/LINTUL")



#######################################################################################
## get NASA data for 2016 - 2019
#######################################################################################
source("Rainfall_NASA.R")
md <- unique(soilNPK[, c("lon", "lat")])
rownames(md) <- NULL
colnames(md) <- c("long", "lat")
library(nasapower)
nrow(md)
## working on the sever
NasaData <- getNasaData(RangeRows = c(1:nrow(md)), nrCluster = 7, startDate = "2016-01-01", endDate = "2019-12-31", 
                        coordPoints = md, fNames = "NasaData_NOT", outputPath = getwd())



#######################################################################################
## run LINTUL
#######################################################################################
## TZ
setwd("D:/ACAI_Wrapper/cloud_compute/LINTUL")
soilNPK <- readRDS("soilNPK_QUEFTS.RDS")
setwd("D:/ACAI_data_download/data")
LIDs <- read.csv("ODK_ONA_IDs.csv")
LIDs1 <- unique(LIDs[, c("fieldID", "plantingDate")])
LIDs1 <- LIDs1[!is.na(LIDs1$plantingDate) & LIDs1$fieldID !="" & !is.na(LIDs1$fieldID), ]

LIDs2 <- unique(LIDs[, c("newFieldID", "plantingDate")])
LIDs2 <- LIDs2[!is.na(LIDs2$plantingDate) & LIDs2$newFieldID != "" & !is.na(LIDs2$newFieldID), ]
colnames(LIDs2) <- colnames(LIDs1)
LIDs <- rbind(LIDs2, LIDs1)
#soilNPK <- merge(soilNPK, LIDs, by="fieldID")
## given an issue with planting date data (not available or illogical) let us use a year from harvet as planting date to get WLY
head(soilNPK)
soilNPK$PlYear <- as.numeric(as.character(soilNPK$harvestYear))-1
soilNPK$Pldate <- paste(soilNPK$harvestDate,"/", soilNPK$harvestMonth, "/", soilNPK$PlYear, sep="")
soilNPK$Pldate <- as.Date(soilNPK$Pldate, format = '%d/%b/%Y')
soilNPK$PlDayNr <- lubridate::yday(soilNPK$Pldate)
## assuming harvest is done rather after 14 months 
for(k in 1:nrow(soilNPK)){
  if(soilNPK$PlDayNr[k] < 62 & soilNPK$PlYear[k] == 2018){
    soilNPK$PlDayNr[k] <- 365 - 62 -  soilNPK$PlDayNr[k]
    soilNPK$PlYear[k] <- 2017
  }else if(soilNPK$PlDayNr[k] < 62 & soilNPK$PlYear[k] == 2017){
    soilNPK$PlDayNr[k] <- 365 - 62 -  soilNPK$PlDayNr[k]
    soilNPK$PlYear[k] <- 2016
  }else if(soilNPK$PlDayNr[k] < 62 & soilNPK$PlYear[k] == 2016){
    soilNPK$PlDayNr[k] <- 1
  }else{
    soilNPK$PlDayNr[k] <- soilNPK$PlDayNr[k] - 62
  }
}

head(soilNPK)
locountry <- unique(soilNPK[, c("country", "location","PlYear" ,"PlDayNr")])
## some locations have multiple harvest ans so planting dates, but they are like one or two days difference so take one of the days



locountry_Adj <- NULL
for (loc in unique(locountry$location)){
  locD <- droplevels(locountry[locountry$location == loc, ])[1,]
  locountry_Adj <- rbind(locountry_Adj, locD)
}

source('lINTUL2_CASSAVA_B.R')
source('lINTUL2_CASSAVA_TZ.R')
LINTUL_WLY_NOT_TZ2 <- runLINTUL_server_NOT(country = "Tanzania", locountry = locountry_Adj)
LINTUL_WLY_NOT_TZ2$location <- paste(LINTUL_WLY_NOT_TZ$lat, LINTUL_WLY_NOT_TZ$long, sep="_") 
saveRDS(LINTUL_WLY_NOT_TZ2, "LINTUL_WLY_NOT_TZ_14M.RDS")
LINTUL_WLY_NOT_TZ2$country <- "Tanzania"




#######################################

setwd("/home/akilimo/projects/SoilNPK/Rwanda")
Rebeca_ISRIC <- read.csv("Rebeca_ISRIC.csv")
Charles_ISRIC <- read.csv("Charles_ISRIC.csv")

head(Charles_ISRIC)

setwd("/home/akilimo/lintul")
source("sourceISRIC.R")


CI <- unique(Charles_ISRIC[, c("geopoint.Longitude", "geopoint.Latitude" )])
names(CI) <- c("lon", "lat")
ISRIC_CI <- NULL
for(i in 1:nrow(CI)){
  isd <- getISRICData(lat = CI$lat[i], lon = CI$lon[i])
  ISRIC_CI <- rbind(ISRIC_CI, isd)
}

Charles_ISRIC$location <- paste(Charles_ISRIC$geopoint.Latitude, Charles_ISRIC$geopoint.Longitude, sep="_")
ISRIC_CI$location <- paste(ISRIC_CI$lat, ISRIC_CI$long, sep="_")

Charles_ISRIC2 <- unique(merge(Charles_ISRIC, ISRIC_CI, by = "location"))
nrow(Charles_ISRIC)
nrow(Charles_ISRIC2)
head(Charles_ISRIC2)

Charles_ISRIC2 <- subset(Charles_ISRIC2, select=-c(long, lat, location))

write.csv(Charles_ISRIC2, "Charles_ISRIC_Data.csv", row.names = FALSE)



RI <- unique(Rebeca_ISRIC[, c("longitude", "latitude" )])
names(RI) <- c("lon", "lat")
ISRIC_RI <- NULL
for(i in 1:nrow(RI)){
  isdd <- getISRICData(lat = RI$lat[i], lon = RI$lon[i])
  ISRIC_RI <- rbind(ISRIC_RI, isdd)
}

Rebeca_ISRIC$location <- paste(Rebeca_ISRIC$X_geopoint_latitude , Rebeca_ISRIC$X_geopoint_longitude, sep="_")
ISRIC_RI$location <- paste(ISRIC_RI$lat, ISRIC_RI$long, sep="_")

Rebeca_ISRIC2 <- unique(merge(Rebeca_ISRIC, ISRIC_RI, by = "location"))
nrow(unique(Rebeca_ISRIC))
nrow(Rebeca_ISRIC2)
head(Rebeca_ISRIC2)
Rebeca_ISRIC2 <- subset(Rebeca_ISRIC2, select = -c(X_geopoint_latitude, X_geopoint_longitude, location))
write.csv(Rebeca_ISRIC2, "Rebeca_ISRIC_Data.csv", row.names = FALSE)




