

list.of.packages <- c("deSolve", "plyr", "lubridate", "parallel", "tidyr", "doParallel", "foreach","randomForest","rpart",
                      "tidyverse", "gtools", "rgdal", "lsmeans", "lme4", "sf", "mlbench", "rgdal","rgeos")


list.of.packages <- c("ggplot2", "emmeans", "lme4", "lmerTest")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)



require(ggplot2)
require(emmeans)
require(lme4)
require(lmerTest)
require(tidyr)
require(plyr)
require(gridExtra)
require(lsmeans)
require(reshape2)
require(raster)
require(rgdal)
require(tidyverse)

# source ODK data from wthin R: PP has done this
# 
# source("D://workspace//ACAI//ODK_functions.R")
# 
# wd <- "D:/workspace/ODK_briefcase_dld"
# setwd(wd)
# 
# forms <- c("Measure_Potato_PO", "Assign_FDTLPO")
# briefCaseDwnld(forms=forms, target=wd, source="ONA")
# 
# ds1 <- dropGroupNames(read.csv("Measure_Potato_PO.csv"))
# ds2 <- dropGroupNames(read.csv("Measure_Potato_PO-plot.csv"))
# ds <- mergeODKforms(ds1, ds2)
# ds$treat <- sub("\\_\\(.*", "", ds$POID2_label)
# ds$treat <- gsub("_rep1", "", ds$treat)
# ds$treat <- gsub("_rep2", "", ds$treat)
# 
# dsY <- ds[grepl("tuberYield", ds$parameters),]
# dsY$tuberY <- dsY$tubersFW / (dsY$plotLength * dsY$plotWidth) * 10
# write.csv(dsY, "D:/workspace/Scaling AKILIMO/Scaling_AKILIMO_yield_data.csv")

###############################################################################
###############################################################################
############### source data from csv file ###############################
setwd("/home/akilimo/projects/SoilNPK/Rwanda")
dsY <- read.csv("/home/akilimo/projects/SoilNPK/Rwanda/Potato_yr2.csv")
length(unique(dsY$FDID2))##58
summary(dsY$tuberY)
hist(dsY$tuberY)

hist(dsY[dsY$treat == "NPK6",]$tuberY)

# data exploration and cleaning
ggplot(dsY, aes(x=treat, y=tuberY)) + 
  geom_violin(draw_quantiles = c(0.25, 0.5, 0.75))+
  geom_jitter(height=0)+
  ylim(0, 75)



summary(dsY$plotLength)
summary(dsY$plotWidth)

dsY$plotLength <- ifelse(dsY$plotLength > 10, dsY$plotLength/10, ifelse(dsY$plotLength==5,4.8, dsY$plotLength)) #correct obvious errors with plot length
dsY$plotWidth <- ifelse(dsY$plotWidth > 10, dsY$plotWidth/10, ifelse(dsY$plotWidth==5,4.8, dsY$plotWidth)) #correct obvious errors with plot width
dsY$tuberY <- dsY$tubersFW / (dsY$plotLength * dsY$plotWidth) * 10

dsY$district <- ifelse(dsY$ENID=='SAENRW000001', 'Burera', ifelse(dsY$ENID=='SAENRW000011', 'Burera', ifelse(dsY$ENID=='SAENRW000003', 'Burera',ifelse(dsY$ENID=='SAENRW000012', 'Burera',ifelse(dsY$ENID=='SAENRW000002', 'Rutsiro', ifelse(dsY$ENID=='SAENRW000006', 'Rutsiro',ifelse(dsY$ENID=='SAENRW000008', 'Rutsiro',ifelse(dsY$ENID=='SAENRW000005', 'Rubavu',ifelse(dsY$ENID=='SAENRW000007', 'Rubavu', ifelse(dsY$ENID=='SAENRW000009', 'Rubavu', '')))))))))) #add districts based on enumerators IDs

dsY$plDens <- dsY$nrPlants / (dsY$plotLength * dsY$plotWidth) # calculate plant density
dsY$YperPlant <- dsY$tuberY / dsY$plDens*10 # calculate yield per plant
summary(dsY$plDens)
summary(dsY$YperPlant)
par(cex.lab=1.5) # is for y-axis
par(cex.axis=1.5) # is for x-axis
boxplot(dsY$plDens ~ FDID2, data=dsY, ylab="Plant density (m-2)", xlab='Field', xaxt="n")
boxplot(dsY$plDens ~ district, data=dsY, ylab="Plant density (m-2)")
boxplot(dsY$YperPlant ~ district, data=dsY, ylab="Tuber yield per plant (kg plant-1)")

par(mar=c(10, 5, 4, 2))
par(mfrow=c(2,2))
boxplot(tuberY ~ treat, data=dsY, ylab="Tuber yield FW (mg/ha)", main="All data", las=2)
boxplot(tuberY ~ treat, data=dsY, subset=dsY$district=='Burera', ylab="Tuber yield FW (mg/ha)", main="Burera", las=2)
boxplot(tuberY ~ treat, data=dsY, subset=dsY$district=='Rubavu', ylab="Tuber yield FW (mg/ha)", main="Rubavu", las=2)
boxplot(tuberY ~ treat, data=dsY, subset=dsY$district=='Rutsiro', ylab="Tuber yield FW (mg/ha)",main="Rutsiro", las=2)

dsY[dsY$district == "Burera" & dsY$tuberY > 55, ]
dsY[dsY$tuberY > 55, ]
plot(dsY$lon, dsY$lat)

plot(dsY$lon, dsY$lat, xlim=c(29.7, 30.0), ylim=c(-1.6, -1.48))

unique(dsY[dsY$lat > -1.51 & dsY$lat <  -1.50 & dsY$lon>29.7, ]$lon)
unique(dsY[dsY$lat > -1.51 & dsY$lat <  -1.50 & dsY$lon>29.81,]$lon)
unique(dsY[dsY$lat < -1.9 & dsY$lat > -1.95  & dsY$lon < 29.47,]$lon)


unique(dsY$cluster)
dsY$cluster <- ifelse(dsY$lat > -1.55 & dsY$lon<29.6, "clus_1", 
                      ifelse(dsY$lat > -1.75 & dsY$lon<29.35, "clus_2",
                             ifelse(dsY$lat > -1.629 & dsY$lat < -1.55& dsY$lon > 29.35 & dsY$lon < 29.45, "clus_14",
                                    ifelse(dsY$lat > -1.65 & dsY$lat < -1.629& dsY$lon > 29.35 & dsY$lon < 29.45, "clus_4",
                                           ifelse(dsY$lat > -1.75 & dsY$lat < -1.7& dsY$lon > 29.35 & dsY$lon < 29.45, "clus_5",
                                                  ifelse(dsY$lat > -1.81 & dsY$lat < -1.75&  dsY$lon < 29.4, "clus_3", 
                                                         ifelse(dsY$lat < -1.81& dsY$lat > -1.86 & dsY$lon > 29.35 & dsY$lon < 29.45, "clus_7",
                                                                ifelse(dsY$lat < -1.9 & dsY$lat > -1.95  & dsY$lon < 29.47, "clus_6",
                                                                       ifelse(dsY$lat < -1.9 & dsY$lat > -2  & dsY$lon > 29.47, "clus_15", 
                                                                              ifelse(dsY$lat > -1.50 & dsY$lon>29.8, "clus_8", 
                                                                                     ifelse(dsY$lat > -1.512 & dsY$lat <  -1.50 & dsY$lon>=29.79 & dsY$lon <= 29.85,"clus_11",
                                                                                            ifelse(dsY$lat < -1.512 & dsY$lat >  -1.525 & dsY$lon<29.85 & dsY$lon>29.8, "clus_12", 
                                                                                                   ifelse(dsY$lat >= -1.575 & dsY$lat < -1.525 & dsY$lon > 29.83, "clus_16",
                                                                                                          ifelse(dsY$lat < -2, "clus_13", "Unknown"))))))))))))))
                                                                                                   
                                                                       
                                                                
                                                                
                                                                                            
                                                                                     
                                                                              


ggclus <- ggplot(dsY, aes(lon, lat, col = factor(cluster)))+
  geom_point(size=2)+
  theme_bw()+
  theme(axis.title = element_text(size=14), axis.text = element_text(size=14),
        legend.position = 'none')
  
ggsave("ggclus_potato.pdf", ggclus)
  

dsYsel <- droplevels(subset(dsY, dsY$district=='Rubavu'|dsY$district=='Rutsiro')) #drop data from Burera (district with disease problems)
dsYsel$cluster <- as.factor(dsYsel$cluster)

gg5 <- ggplot(dsYsel, aes(cluster, tuberY, fill=factor(treat)))+
        geom_boxplot()+
        facet_wrap(~district, scale ="free")+
        theme_bw()+
        theme(axis.title = element_text(size=14), axis.text = element_text(size=14, angle=90),
              strip.text = element_text(size=14))
ggsave("ggclusDist_potato.pdf", gg5)


dsYsel <- unique(dsYsel[, c("lat", "lon", "FDID2","treat", "tuberY", "district","cluster")])

dsnpk <- dsYsel[dsYsel$treat == "NPK11", ]
dsNnpk <- dsYsel[!dsYsel$treat == "NPK11", ]

dsnpkav <- ddply(dsnpk, .(lat,lon,FDID2,district, cluster), summarize, avNPK  = mean(tuberY ))

dsNnpk <- unique(merge(dsNnpk, dsnpkav, by=c("lat","lon","FDID2","district", "cluster")))
head(dsNnpk)
dsNnpk$Ydiff <- dsNnpk$avNPK - dsNnpk$tuberY

ggplot(dsNnpk, aes(avNPK, tuberY, fill=factor(cluster)))+
  geom_boxplot()+
  facet_wrap(~district)
  
ggplot(data=dsNnpk, aes(x=Ydiff, y=treat ,col = district)) +
  facet_grid(district ~ treat) +
   geom_point(size=1.2) 


# data analysis
mod1 <- lmer(tuberY ~ 1 + treat + (1|FDID2), data=dsYsel)
plot(mod1)
#note: transformation of data required... heteroskedastic residuals 

mod2 <- lmer(sqrt(tuberY) ~ treat + (1|FDID2), data=dsYsel)
plot(mod2) # no more heteroskedastici residuals
dsY <- dsY[which(abs(resid(mod2))<2),] 


lsm <- emmeans(mod2, ~treat, lmerTest.limit = 5000)
dif <- contrast(regrid(lsm, transform="response"), "trt.vs.ctrl", ref = 1, adjust="sidak")
dif <- contrast(regrid(lsm, transform="response"), "trt.vs.ctrl", ref = 5, adjust="sidak")

mod3 <- lmer(sqrt(tuberY) ~ treat + (0+treat|FDID2), data=dsYsel)
plot(mod3)
anova(mod2, mod3) #no significant random slope -> indication for no site-specific responses
summary(mod2)

mod4 <- lmer(sqrt(tuberY) ~ treat*district + (1|FDID2), data=dsYsel) #evaluate interaction between treatment and district
plot(mod4)
anova(mod2, mod3)

mod5 <- lmer(sqrt(tuberY) ~ treat*district + (0+treat|FDID2),data=dsYsel)
plot(mod5)
anova(mod4, mod5)


mod6 <- lmer(sqrt(tuberY) ~ treat*district + (0+treat|FDID2) + (1|cluster), data=dsYsel)
plot(mod6)
anova(mod5, mod6)

## get BLUPs

ref.grid(mod6)
lsm6 <- lsmeans(mod6, c("treat", "district"))
lsmDF <- as.data.frame(lsm6)
lsmDF <- lsmDF[,1:3]
names(lsmDF)[1] <- "treat"
names(lsmDF)[3] <- "lsmean"
row.names(lsmDF) <- NULL



ranFD <- ranef(mod6)$FDID2
ranCluster <- ranef(mod6)$cluster
names(ranCluster) <- "clusRanef"
ranFD$FD <- row.names(ranFD)
ranCluster$cluster <- row.names(ranCluster)
ranCluster<- unique(merge(ranCluster, unique(dsYsel[, c("district", "cluster", "FDID2")])))

names(ranFD) <- gsub("treat", "", names(ranFD))
ranFD <- melt(ranFD, 
             measure.vars=c("NPK11","NPK4_DAP2","NPK4_MOP2","NPK4_UREA2","NPK6"),
             variable.name="treat",
             id.vars="FD",
             value.name="blup")

head(ranFD)
rownames(ranFD) <- NULL
ranFD <- merge(ranFD, ranCluster, by.x="FD", by.y="FDID2")
ranFD$blup <- ranFD$blup + ranFD$clusRanef
ranFD <- ranFD[, c("district", "cluster", "FD",  "treat", "blup")]
head(ranFD)


fixRan <- merge(ranFD, lsmDF, by=c("treat", "district"))
fixRan$blup <- fixRan$blup + fixRan$lsmean
fixRan <- subset(fixRan, select=-c(lsmean))
head(fixRan)
fixRan$blup <- (fixRan$blup)^2
summary(fixRan$blup)
summary(dsYsel$tuberY)

fixRan <- unique(fixRan[, c("district","cluster","FD", "treat", "blup")])


fixRanWide <- spread(fixRan, treat, blup)
head(fixRanWide)
fixRanWide <- unique(merge(fixRanWide, dsY[, c("FDID2", "lat", "lon")], by.x="FD", by.y="FDID2"))

saveRDS(fixRanWide, "Potato_fixRanWide_21.RDS")

fixRanWide <- readRDS("Potato_fixRanWide_21.RDS")

longData <- gather(fixRanWide, treat, value, NPK4_DAP2:NPK6)
head(longData)


longData$YieldDiff <- longData$NPK11 - longData$value
unique(longData$treat )
saveRDS(longData, "Potato_longData_21.RDS")

longData <- readRDS("Potato_longData_21.RDS")
head(longData)
length(unique(longData$FD))

gg10 <- ggplot(data=longData, aes(value, y=YieldDiff, col = factor(treat))) +
  facet_grid(district~treat) +
  stat_density2d(geom="path") +
  geom_point(size=1.2) +
  geom_abline(intercept=0,slope=0, col="darkgrey") +
  xlab("") +
  ylab("Yield difference to NPK [t/ha]") +
  theme_bw() +
  theme(axis.text  = element_text(size=12), legend.position = "none",
        axis.title = element_text(size=15), strip.text = element_text(size=12), 
        legend.text =element_text(size=15), legend.title = element_blank())
ggsave("blups_potato_yr2.pdf", gg10)

##########################################################################################
############################## reverse QUEFTS#############################
# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda")
fixRanWide <- readRDS("Potato_fixRanWide_21.RDS")
head(fixRanWide)




### change fresh matter to dry matter
fixRanWide$DM_NPK <- fixRanWide$NPK11 * 0.21 * 1000
fixRanWide$NPK4_DAP2_DM <- fixRanWide$NPK4_DAP2 * 0.21 * 1000
fixRanWide$NPK4_MOP2_DM <- fixRanWide$NPK4_MOP2 * 0.21 * 1000
fixRanWide$NPK4_UREA2_DM <- fixRanWide$NPK4_UREA2 * 0.21 * 1000
fixRanWide$NPK6_DM <- fixRanWide$NPK6 * 0.21 * 1000
head(fixRanWide)

#RecoveryFraction <- data.frame(rec_N=0.5, rec_P=0.21, rec_K=0.49)## cassava
RecoveryFraction <- data.frame(rec_N=0.41, rec_P=0.25, rec_K=0.65)## potato

## fertilizers added
#NPK171717:  0.17, P_cont = 0.083, K_cont=0.15
#DAP: N_cont = 0.18, P_cont = 0.20, K_cont=0.0
#MOP : N_cont = 0.00, P_cont = 0.00, K_cont=0.50

fert <- rbind(
  data.frame(type = "NPK6", Ncont = 6*50*0.17, Pcont = 6*50*0.083, Kcont= 6 *50*0.15),
  data.frame(type = "NPK4_MOP2", Ncont =4*50*0.17, Pcont = 4*50*0.083, Kcont= (0.5*50*2)+4*50*0.15),
  data.frame(type = "NPK4_UREA2", Ncont =(4*50*0.17 + (2*50*0.46)), Pcont = (4*50*0.083), Kcont= 4*50*0.15),
  data.frame(type = "NPK4_DAP2", Ncont =(4*50*0.17 + (2*50*0.18)), Pcont = (4*50*0.083 + (2*50*0.2)), Kcont= 4*50*0.15),
  data.frame(type = "NPK11", Ncont = 11*50*0.17, Pcont = 11*50*0.083, Kcont= 11*50 *0.15)
)

fert$Ncont <- round(fert$Ncont, digits = 0)
fert$Pcont <- round(fert$Pcont, digits = 0)
fert$Kcont <- round(fert$Kcont, digits = 0)


data.frame(type = c( "NPK6", "NPK4_MOP2",  "NPK4_UREA2",  "NPK4_DAP2", "NPK11"),
                   N = c(51,34,80,52,94), P =c(22,15,15,35,41), K = c(42,78,28,28,78))
addedFertilizer <- fert[fert$type %in% c("NPK6", "NPK4_MOP2", "NPK4_UREA2","NPK4_DAP2"), c("Ncont","Pcont","Kcont") ]
names(addedFertilizer) <- c("FN","FP","FK")


# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda")
source("QUEFTS_function.R")

# setwd("D:/ACAI_Wrapper/Rwanda/Potato")
# source("QUEFTS_Potato_function.R")

soilNPK <- NULL
validation_result <- NULL

for(i in 1:nrow(fixRanWide)){
  oneTrial <- fixRanWide[i,]	
  WLY <- oneTrial$DM_NPK # is in kg/ha dry root
  

  YieldMatrix <- as.data.frame(matrix(nrow=4,ncol=2))
  colnames(YieldMatrix) <- c("treatment", "NOT_yield")
  YieldMatrix[,1] <- c("Control", "PK", "NK", "NP")
  YieldMatrix[,2] <- c(oneTrial$NPK6_DM, oneTrial$NPK4_MOP2_DM, oneTrial$NPK4_UREA2_DM, oneTrial$NPK4_DAP2_DM )
  
  ## optimizing by min TSS. 
  soil_NPK <- optim(par=c(0,0,0), optim_INS, lower=c(0.1, 0.1, 0.1), method = "L-BFGS-B", 
                    addedFertilizer=addedFertilizer, YieldMatrix=YieldMatrix, WLY=WLY)$par	
  
 
  Yield_control <- Yield_S(SN = (addedFertilizer[1,1]*RecoveryFraction$rec_N) + soil_NPK[1] , 
                           SP = (addedFertilizer[1,2]*RecoveryFraction$rec_P) + soil_NPK[2], 
                           SK = (addedFertilizer[1,3] *RecoveryFraction$rec_K) + soil_NPK[3], WLY = WLY)
  
  
  controlYield <- Yield_S(SN = soil_NPK[1] , 
                          SP = soil_NPK[2], 
                          SK = soil_NPK[3], WLY = WLY)
  
  ## all teh following three are in DM and in kg
    oneTrial$Control_observed <- oneTrial$ NPK6_DM
    oneTrial$Control_estimated <-  Yield_control
    oneTrial$CY_DM <- controlYield
   
  
  validation_result <- rbind(validation_result, oneTrial)
  
  snpk <- oneTrial
  snpk$soilN <- soil_NPK[1]
  snpk$soilP <- soil_NPK[2]
  snpk$soilK <- soil_NPK[3]
  soilNPK <- rbind(soilNPK, snpk)
}


head(soilNPK)
hist(soilNPK$Control_observed - soilNPK$CY)

ggplot(soilNPK, aes(Control_observed - CY_DM, soilN, col=factor(district)))+
  geom_point(size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_bw()+
  theme(axis.title = element_text(size=14), axis.text = element_text(size=14))


ggplot(soilNPK, aes(soilN, Control_estimated, col=factor(district)))+
  geom_point(size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_bw()+
  theme(axis.title = element_text(size=14), axis.text = element_text(size=14))

ggplot(soilNPK, aes(CY_DM, Control_observed, col=factor(district)))+
  geom_point(size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_bw()+
  theme(axis.title = element_text(size=14), axis.text = element_text(size=14))


NPK6_CY <- ggplot(soilNPK, aes(CY_DM, Control_estimated, col=factor(district)))+
  geom_point(size=3)+
  geom_abline(slope=1, intercept=0)+
  xlab("Zero fertilizerfrom reverse QUEFTS, dry matter.")+
  ylab("NPK6 from reverse QUEFTS, dry matter")+
  theme_bw()+
  theme(axis.title = element_text(size=14), axis.text = element_text(size=14))

ggsave("NPK6_zerofertilizer.pdf", NPK6_CY)


saveRDS(validation_result, "validation_result_potatoyr2.RDS")


potato_reverseQUEFTS <- ggplot(validation_result, aes(Control_observed, Control_estimated, col=factor(district)))+
  geom_point(size=3)+
  geom_abline(slope=1, intercept=0)+
  theme_bw()+
  theme(axis.title = element_text(size=14), axis.text = element_text(size=14))

ggsave("Potato.ReverseQUEFTSYr2.pdf", potato_reverseQUEFTS)



ggplot(soilNPK, aes(cluster, soilK))+
  geom_boxplot()

soilNPK <- soilNPK[soilNPK$soilK <=150, ]
# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda")
saveRDS(soilNPK, "Potato_yr2_soilNPK_GPS.RDS")


# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda")
validation_result <- readRDS("validation_result_potatoyr2.RDS")
soilNPK_GPS <- readRDS("Potato_yr2_soilNPK_GPS.RDS")
head(soilNPK_GPS)

ggplot(soilNPK_GPS, aes(district, NPK11))+
  geom_boxplot()



NPK11_NPK6 <- ggplot(soilNPK_GPS, aes(district, NPK11 - NPK6))+
  geom_boxplot()

ggsave("NPK11_NPK6.pdf", NPK11_NPK6)

ggNPK11<- ggplot(soilNPK_GPS, aes(district, NPK11))+
  geom_boxplot()
ggsave("NPK11byLoc.pdf", ggNPK11)


soilNPK_GPS$zeroFert <- (soilNPK_GPS$CY_DM / 0.21)/1000

NPK6_0Fert <- ggplot(soilNPK_GPS, aes(zeroFert, NPK6))+
  geom_point()+
  facet_wrap(~district)+
  geom_abline(slope = 1, intercept = 0)+
  xlab("control yield with no fertilizer")+
  ylab("NPK 6 BLUP")+
  theme_bw()
ggsave("NPK6_0Fert.pdf", NPK6_0Fert)

ggplot(soilNPK_GPS, aes(zeroFert, NPK11))+
  geom_point()+
  facet_wrap(~district)+
  geom_abline(slope = 1, intercept = 0)+
  xlab("control yield with no fertilizer")+
  ylab("NPK 6 BLUP")+
  theme_bw()





summary(soilNPK_GPS$soilN)
summary(soilNPK_GPS$soilP)
summary(soilNPK_GPS$soilK)

quantile(soilNPK_GPS$NPK6_DM, probs=seq(0,1,0.1))
quantile(soilNPK_GPS$Control_estimated, probs=seq(0,1,0.1))
quantile(soilNPK_GPS$soilN, probs=seq(0,1,0.1))

head(soilNPK_GPS)




library(plotly)

plot_ly(
  soilNPK_GPS, x= ~lon, y= ~lat, z= ~soilN,
  type='mesh3d', intensity = ~soilN,
  colors= colorRamp(rainbow(5))
)


### add the other variables such as organic fertilizer use ...
Potato_moreVar <- read.csv("/home/akilimo/projects/SoilNPK/Rwanda/Potato_moreVar.csv")

soilNPK_GPS_Vars <- unique(merge(soilNPK_GPS, Potato_moreVar, by.x="FD", by.y="FDID2"))
nrow(soilNPK_GPS_Vars)
nrow(soilNPK_GPS)

###  get isrics data
setwd("D:/ACAI_Wrapper/cloud_compute/LINTUL")
source("sourceISRIC.R")


get_ISIRIC_AOI <- function(AOI_GPS){
  ISRIC_AOI <- NULL
  for(i in 1:nrow(AOI_GPS)){
    onePoint <- getISRICData(lat = AOI_GPS$lat[i], lon = AOI_GPS$lon[i], trialID = AOI_GPS$trialID[i])
    ISRIC_AOI <- rbind(ISRIC_AOI, onePoint)
  }
  return(ISRIC_AOI)
}


AOI_GPS <- unique(soilNPK_GPS[, c("FD", "lon", "lat")])
str(AOI_GPS)
names(AOI_GPS) <- c("trialID", "lon", "lat")
POt_RW_ISRIC <- get_ISIRIC_AOI(AOI_GPS = AOI_GPS)
str(POt_RW_ISRIC)
# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda")

POt_RW_ISRIC2 <- subset(POt_RW_ISRIC, select = -c(soilN, soilP, soilK, NPK6, FD, 
                                                  positionLandscape,slope,yearsSinceFallow,clearing,
                                                  ploughing,ridging,fertilizer,organicInput, lime,
                                                  localSoilNameTranslated, fertilityFarmer, fertilityResearcher,
                                                  fallow,previousCrop))

saveRDS(POt_RW_ISRIC2, "Potato_RW_ISRIC_212.RDS")




soilNPK_GPS <- readRDS("Potato_yr2_soilNPK_GPS.RDS")
POt_RW_ISRIC <- readRDS("Potato_RW_ISRIC_212.RDS")
soilNPK_GPS$CON <- (soilNPK_GPS$CY_DM/0.21)/1000


POt_RW_ISRIC <- unique(merge(POt_RW_ISRIC, unique(soilNPK_GPS[, c("lon", "lat", "soilN", "soilP", "soilK", "CON", "FD")])), by=c("lon","lat"))
nrow(POt_RW_ISRIC)
str(POt_RW_ISRIC)

POt_RW_ISRIC %>% str()
POt_RW_ISRIC$trialID <- as.factor(POt_RW_ISRIC$trialID)
POt_RW_ISRIC$ncluster <- as.factor(POt_RW_ISRIC$ncluster)
# POt_RW_ISRIC$positionLandscape <- as.factor(POt_RW_ISRIC$positionLandscape)
# POt_RW_ISRIC$slope <- as.factor(POt_RW_ISRIC$slope)
# POt_RW_ISRIC$yearsSinceFallow <- as.factor(POt_RW_ISRIC$yearsSinceFallow)
# POt_RW_ISRIC$clearing <- as.factor(POt_RW_ISRIC$clearing)
# POt_RW_ISRIC$ploughing <- as.factor(POt_RW_ISRIC$ploughing)
# POt_RW_ISRIC$ridging <- as.factor(POt_RW_ISRIC$ridging)
# POt_RW_ISRIC$fertilizer <- as.factor(POt_RW_ISRIC$fertilizer)
# POt_RW_ISRIC$organicInput <- as.factor(POt_RW_ISRIC$organicInput)
# POt_RW_ISRIC$lime <- as.factor(POt_RW_ISRIC$lime)
# POt_RW_ISRIC$localSoilNameTranslated <- as.factor(POt_RW_ISRIC$localSoilNameTranslated)
# POt_RW_ISRIC$fertilityFarmer <- as.factor(POt_RW_ISRIC$fertilityFarmer)
# POt_RW_ISRIC$fertilityResearcher <- as.factor(POt_RW_ISRIC$fertilityResearcher)
# POt_RW_ISRIC$fallow <- as.factor(POt_RW_ISRIC$fallow)
# POt_RW_ISRIC$previousCrop <- as.factor(POt_RW_ISRIC$previousCrop)
POt_RW_ISRIC$FD <- as.factor(POt_RW_ISRIC$FD)
POt_RW_ISRIC <- POt_RW_ISRIC %>% mutate_if(is.character, as.numeric)
POt_RW_ISRIC %>% str()

summary(POt_RW_ISRIC$P_Mehlich)

# 
# POt_RW_ISRIC <- POt_RW_ISRIC[, c("FD","soilN", "soilP", "soilK", "exchK", "P_Mehlich", "Clay_5","Clay_15","Clay_30","percentSOM_5","percentSOM_15","percentSOM_30",
#                                     "pH_5","pH_15","pH_30", "silt_5", "silt_15", "silt_30","BD_5", "BD_15", "BD_30", "CEC_5","CEC_15","CEC_30", "percentSOC_5",
#                                     "percentSOC_15", "percentSOC_30", "FC_5", "FC_15", "FC_30", "wp_5", "wp_15", "wp_30", "sws_5", "sws_15", "sws_30","Sand_5", "Sand_15",
#                                     "Sand_30", "TotalN", "Mn","B", "Ca","Fe", "Cu","Al", "Mg", "Na","ncluster", "NPK6",
#                                  "positionLandscape","slope","yearsSinceFallow","clearing",
#                                  "ploughing","ridging","fertilizer","organicInput", "lime",
#                                  "localSoilNameTranslated", "fertilityFarmer", "fertilityResearcher",
#                                  "fallow","previousCrop")]

POt_RW_ISRIC <- POt_RW_ISRIC[, c("FD","soilN", "soilP", "soilK", "exchK", "P_Mehlich", "Clay_5","Clay_15","Clay_30","percentSOM_5","percentSOM_15","percentSOM_30",
                                 "pH_5","pH_15","pH_30", "silt_5", "silt_15", "silt_30","BD_5", "BD_15", "BD_30", "CEC_5","CEC_15","CEC_30", "percentSOC_5",
                                 "percentSOC_15", "percentSOC_30", "FC_5", "FC_15", "FC_30", "wp_5", "wp_15", "wp_30", "sws_5", "sws_15", "sws_30","Sand_5", "Sand_15",
                                 "Sand_30", "TotalN", "Mn","B", "Ca","Fe", "Cu","Al", "Mg", "Na","ncluster", "CON")]

POt_RW_ISRIC$CON <- round(POt_RW_ISRIC$CON, digits=0)




## get iSDA data for the trial locations
# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda")
PlandCropCover <- readRDS("potato_isda_cover.RDS")
RW_missing_landCover <-  readRDS("RW_isda_cover_missing.RDS")
PlandCropCover <- rbind(PlandCropCover, RW_missing_landCover)

PlandCropCover <- PlandCropCover[ , c("slope_angle", "fcc", "lat", "lon")]
PlandCropCover <- unique(merge(PlandCropCover, soilNPK_GPS[, c("FD", "lat", "lon")]), by=c("lon", "lat"))
length(unique(PlandCropCover$FD))

ACAI_isda <- readRDS("potato_isda.RDS")




ACAI_isda <- unique(merge(soilNPK_GPS[, c("lon", "lat", "soilN", "soilP", "soilK", "CON", "FD")], ACAI_isda, by.y=c("FDID2","lon", "lat"), by.x=c("FD","lon", "lat")))
length(unique(ACAI_isda$FDID2))

### link the blups of the control
ACAI_isda$CON <- round(ACAI_isda$CON , digits=0)
hist(ACAI_isda$CON)


head(ACAI_isda)
ACAI_isda$Variable <- paste(ACAI_isda$Variable, ACAI_isda$depth, sep="_")
ACAI_isda_wide <- spread(ACAI_isda[, c("lat","lon", "FD", "Variable", "CON", "value", "soilN", "soilP", "soilK")], Variable, value, )
colnames(ACAI_isda_wide) <- gsub("-", "_", colnames(ACAI_isda_wide))
head(ACAI_isda_wide)

ACAI_isda_wide <- unique(merge(ACAI_isda_wide, PlandCropCover, by=c("lat", "lon", "FD")))
unique(ACAI_isda_wide$fcc)

head(ACAI_isda_wide)
str(ACAI_isda_wide)
## covert to numeric 
ACAI_isda_wide %>% str()
ACAI_isda_wide$texture_class_0_20 <- as.factor(ACAI_isda_wide$texture_class_0_20)
ACAI_isda_wide$texture_class_20_50 <- as.factor(ACAI_isda_wide$texture_class_20_50)
ACAI_isda_wide$FD <- as.factor(ACAI_isda_wide$FD)
ACAI_isda_wide$fcc <- as.factor(ACAI_isda_wide$fcc)
# ACAI_isda_wide$positionLandscape <- as.factor(ACAI_isda_wide$positionLandscape)
# ACAI_isda_wide$slope <- as.factor(ACAI_isda_wide$slope)
# ACAI_isda_wide$yearsSinceFallow <- as.factor(ACAI_isda_wide$yearsSinceFallow)
# ACAI_isda_wide$clearing <- as.factor(ACAI_isda_wide$clearing)
# ACAI_isda_wide$ploughing <- as.factor(ACAI_isda_wide$ploughing)
# ACAI_isda_wide$ridging <- as.factor(ACAI_isda_wide$ridging)
# ACAI_isda_wide$fertilizer <- as.factor(ACAI_isda_wide$fertilizer)
# ACAI_isda_wide$organicInput <- as.factor(ACAI_isda_wide$organicInput)
# ACAI_isda_wide$lime <- as.factor(ACAI_isda_wide$lime)
# ACAI_isda_wide$localSoilNameTranslated <- as.factor(ACAI_isda_wide$localSoilNameTranslated)
# ACAI_isda_wide$fertilityFarmer <- as.factor(ACAI_isda_wide$fertilityFarmer)
# ACAI_isda_wide$fertilityResearcher <- as.factor(ACAI_isda_wide$fertilityResearcher)
# ACAI_isda_wide$fallow <- as.factor(ACAI_isda_wide$fallow)
# ACAI_isda_wide$previousCrop <- as.factor(ACAI_isda_wide$previousCrop)
ACAI_isda_wide <- ACAI_isda_wide %>% mutate_if(is.character, as.numeric)
ACAI_isda_wide %>% str()

### get organic matter
ACAI_isda_wide$soilOM_0_20  <- ACAI_isda_wide$carbon_organic_0_20  * 2
ACAI_isda_wide$soilOM_20_50  <- ACAI_isda_wide$carbon_organic_20_50  * 2

ACAI_isda_wide$percentSOM_0_20 <- ACAI_isda_wide$soilOM_0_20/10
ACAI_isda_wide$percentSOM_20_50 <- ACAI_isda_wide$soilOM_20_50/10



## use pedo-transfer equations and get FC WP and SC for the two depth apart
##### WP ######
ACAI_isda_wide$wp_0_20A <- (-0.024*ACAI_isda_wide$sand_content_0_20/100) + 0.487*ACAI_isda_wide$clay_content_0_20/100 + 0.006*ACAI_isda_wide$percentSOM_0_20 + 
  0.005*(ACAI_isda_wide$sand_content_0_20/100 *ACAI_isda_wide$percentSOM_0_20) - 0.013*(ACAI_isda_wide$clay_content_0_20/100 * ACAI_isda_wide$percentSOM_0_20) + 
  0.068*(ACAI_isda_wide$sand_content_0_20/100 * ACAI_isda_wide$clay_content_0_20/100 ) + 0.031
ACAI_isda_wide$wp_0_20 <- (ACAI_isda_wide$wp_0_20A + (0.14 * ACAI_isda_wide$wp_0_20A - 0.02))*100

ACAI_isda_wide$wp_20_50A <- (-0.024*ACAI_isda_wide$sand_content_20_50/100) + 0.487*ACAI_isda_wide$clay_content_20_50/100 + 0.006*ACAI_isda_wide$percentSOM_20_50 + 
  0.005*(ACAI_isda_wide$sand_content_20_50/100 *ACAI_isda_wide$percentSOM_20_50) - 0.013*(ACAI_isda_wide$clay_content_20_50/100 * ACAI_isda_wide$percentSOM_20_50) + 
  0.068*(ACAI_isda_wide$sand_content_20_50/100 * ACAI_isda_wide$clay_content_20_50/100 ) + 0.031
ACAI_isda_wide$wp_20_50 <- (ACAI_isda_wide$wp_20_50A + (0.14 * ACAI_isda_wide$wp_20_50A - 0.02))*100

ACAI_isda_wide <- subset(ACAI_isda_wide, select=-c(wp_0_20A, wp_20_50A))



##### FC ######
ACAI_isda_wide$FC_0_20_A <- -0.251*ACAI_isda_wide$sand_content_0_20/100 + 0.195*ACAI_isda_wide$clay_content_0_20/100 + 0.011*ACAI_isda_wide$percentSOM_0_20 + 
  0.006*(ACAI_isda_wide$sand_content_0_20/100 * ACAI_isda_wide$percentSOM_0_20) - 0.027*(ACAI_isda_wide$clay_content_0_20/100 * ACAI_isda_wide$percentSOM_0_20) + 
  0.452*(ACAI_isda_wide$sand_content_0_20/100 * ACAI_isda_wide$clay_content_0_20/100) + 0.299
ACAI_isda_wide$FC_0_20 <- (ACAI_isda_wide$FC_0_20_A + (1.283 * ACAI_isda_wide$FC_0_20_A^2 - 0.374 * ACAI_isda_wide$FC_0_20_A - 0.015))*100


ACAI_isda_wide$FC_20_50_A <- -0.251*ACAI_isda_wide$sand_content_20_50/100 + 0.195*ACAI_isda_wide$clay_content_20_50/100 + 0.011*ACAI_isda_wide$percentSOM_20_50 + 
  0.006*(ACAI_isda_wide$sand_content_20_50/100 * ACAI_isda_wide$percentSOM_20_50) - 0.027*(ACAI_isda_wide$clay_content_20_50/100 * ACAI_isda_wide$percentSOM_20_50) + 
  0.452*(ACAI_isda_wide$sand_content_20_50/100 * ACAI_isda_wide$clay_content_20_50/100) + 0.299
ACAI_isda_wide$FC_20_50 <- (ACAI_isda_wide$FC_20_50_A + (1.283 * ACAI_isda_wide$FC_20_50_A^2 - 0.374 * ACAI_isda_wide$FC_20_50_A - 0.015))*100


ACAI_isda_wide <- subset(ACAI_isda_wide, select=-c(FC_0_20_A, FC_20_50_A))


##### soil water at ######
ACAI_isda_wide$sws_0_20_A <- 0.278*(ACAI_isda_wide$sand_content_0_20/100)+0.034*(ACAI_isda_wide$clay_content_0_20/100)+0.022*ACAI_isda_wide$percentSOM_0_20-
  0.018*(ACAI_isda_wide$sand_content_0_20/100*ACAI_isda_wide$percentSOM_0_20)-
  0.027*(ACAI_isda_wide$clay_content_0_20/100*ACAI_isda_wide$percentSOM_0_20)-0.584*(ACAI_isda_wide$sand_content_0_20/100*ACAI_isda_wide$clay_content_0_20/100)+0.078

ACAI_isda_wide$sws_0_20_B <- (ACAI_isda_wide$sws_0_20_A +(0.636*ACAI_isda_wide$sws_0_20_A-0.107))*100
ACAI_isda_wide$sws_0_20 <- (ACAI_isda_wide$FC_0_20/100+ACAI_isda_wide$sws_0_20_B/100-(0.097*ACAI_isda_wide$sand_content_0_20/100)+0.043)*100


ACAI_isda_wide$sws_20_50_A <- 0.278*(ACAI_isda_wide$sand_content_20_50/100)+0.034*(ACAI_isda_wide$clay_content_20_50/100)+0.022*ACAI_isda_wide$percentSOM_20_50-
  0.018*(ACAI_isda_wide$sand_content_20_50/100*ACAI_isda_wide$percentSOM_20_50)-
  0.027*(ACAI_isda_wide$clay_content_20_50/100*ACAI_isda_wide$percentSOM_20_50)-0.584*(ACAI_isda_wide$sand_content_20_50/100*ACAI_isda_wide$clay_content_20_50/100)+0.078
ACAI_isda_wide$sws_20_50_B <- (ACAI_isda_wide$sws_20_50_A +(0.636*ACAI_isda_wide$sws_20_50_A-0.107))*100
ACAI_isda_wide$sws_20_50 <- (ACAI_isda_wide$FC_20_50/100+ACAI_isda_wide$sws_20_50_B/100-(0.097*ACAI_isda_wide$sand_content_20_50/100)+0.043)*100


ACAI_isda_wide <- subset(ACAI_isda_wide, select=-c(sws_0_20_A, sws_0_20_B, sws_20_50_A, sws_20_50_B))
str(ACAI_isda_wide)
ACAI_isda_wide$location <- paste(ACAI_isda_wide$lon, ACAI_isda_wide$lat, sep="_")





## Random firest model with 0 - 5 - 10 - 15 - 20  or 0 - 7.5 - 15 - 22.5 - 30 yeild level
## create the contol classes
### create classes of control
ACAI_isda_wide$CONclass1 <- ifelse(ACAI_isda_wide$CON < 5, "class1",
                                          ifelse(ACAI_isda_wide$CON >= 5 & ACAI_isda_wide$CON < 10, "class2",
                                                 ifelse(ACAI_isda_wide$CON >= 10 & ACAI_isda_wide$CON < 15, "class3",
                                                        ifelse(ACAI_isda_wide$CON >= 15 & ACAI_isda_wide$CON < 20, "class4", "class5"))))
ACAI_isda_wide$CONclass1 <- as.factor(ACAI_isda_wide$CONclass1)



ACAI_isda_wide$CONclass2 <- ifelse(ACAI_isda_wide$CON < 7.5, "class1",
                                   ifelse(ACAI_isda_wide$CON >= 7.5 & ACAI_isda_wide$CON < 15, "class2",
                                          ifelse(ACAI_isda_wide$CON >= 15 & ACAI_isda_wide$CON < 22.5, "class3",
                                                 ifelse(ACAI_isda_wide$CON >= 22.5 & ACAI_isda_wide$CON < 30, "class4", "class5"))))
ACAI_isda_wide$CONclass2 <- as.factor(ACAI_isda_wide$CONclass2)




POt_RW_ISRIC$CONclass1 <- ifelse(POt_RW_ISRIC$CON < 5, "class1",
                                   ifelse(POt_RW_ISRIC$CON >= 5 & POt_RW_ISRIC$CON < 10, "class2",
                                          ifelse(POt_RW_ISRIC$CON >= 10 & POt_RW_ISRIC$CON < 15, "class3",
                                                 ifelse(POt_RW_ISRIC$CON >= 15 & POt_RW_ISRIC$CON < 20, "class4", "class5"))))
POt_RW_ISRIC$CONclass1 <- as.factor(POt_RW_ISRIC$CONclass1)



POt_RW_ISRIC$CONclass2 <- ifelse(POt_RW_ISRIC$CON < 7.5, "class1",
                                   ifelse(POt_RW_ISRIC$CON >= 7.5 & POt_RW_ISRIC$CON < 15, "class2",
                                          ifelse(POt_RW_ISRIC$CON >= 15 & POt_RW_ISRIC$CON < 22.5, "class3",
                                                 ifelse(POt_RW_ISRIC$CON >= 22.5 & POt_RW_ISRIC$CON < 30, "class4", "class5"))))
POt_RW_ISRIC$CONclass2 <- as.factor(POt_RW_ISRIC$CONclass2)

head(ACAI_isda_wide)


# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda")
saveRDS(ACAI_isda_wide, "Pot_RW_isda.RDS")
saveRDS(POt_RW_ISRIC, "POt_RW_ISRIC.RDS")

ACAI_isda_wide <- readRDS("Pot_RW_isda.RDS")
POt_RW_ISRIC <- readRDS("POt_RW_ISRIC.RDS")
# 
# ACAI_isda_wide <- subset(ACAI_isda_wide, select=-c(positionLandscape,slope,yearsSinceFallow,clearing,
#                                                    ploughing,ridging,fertilizer,organicInput, lime,
#                                                    localSoilNameTranslated, fertilityFarmer, fertilityResearcher,
#                                                    fallow,previousCrop))
# POt_RW_ISRIC <- subset(POt_RW_ISRIC, select = -c(positionLandscape,slope,yearsSinceFallow,clearing,
#                                                  ploughing,ridging,fertilizer,organicInput, lime,
#                                                  localSoilNameTranslated, fertilityFarmer, fertilityResearcher,
#                                                  fallow,previousCrop))


set.seed(444)
indexdata <- sample(2, nrow(ACAI_isda_wide), replace=TRUE, prob=c(0.7,0.3))## where conrtol yield is used as a covariate

iSDA_train_P <- ACAI_isda_wide[indexdata == 1, ]
iSDA_test_P <- ACAI_isda_wide[indexdata == 2, ]

isric_train_P <- POt_RW_ISRIC[indexdata == 1, ]
isric_test_P <- POt_RW_ISRIC[indexdata == 2, ]


iSDA_train_P_con1 <-unique(subset(iSDA_train_P, select=-c(lat, lon, FD, CON, CONclass2, location, fcc, slope_angle)))
iSDA_test_P_con1 <- unique(subset(iSDA_test_P, select=-c(lat, lon, FD, CON, CONclass2, location, fcc, slope_angle)))

iSDA_train_P_con2 <-unique(subset(iSDA_train_P, select=-c(lat, lon, FD, CON, CONclass1, location, fcc, slope_angle)))
iSDA_test_P_con2 <- unique(subset(iSDA_test_P, select=-c(lat, lon, FD, CON, CONclass1, location, fcc, slope_angle)))


isric_train_P_con1 <-unique(subset(isric_train_P, select=-c(CON, CONclass2, FD)))
isric_test_P_con1 <- unique(subset(isric_test_P, select=-c(CON, CONclass2, FD)))

isric_train_P_con2 <-unique(subset(isric_train_P, select=-c(CON, CONclass1, FD)))
isric_test_P_con2 <- unique(subset(isric_test_P, select=-c(CON, CONclass1, FD)))




iSDA_train_P_con1_N <- subset(iSDA_train_P_con1, select=-c(soilP, soilK))
iSDA_train_P_con1_P <- subset(iSDA_train_P_con1, select=-c(soilN, soilK))
iSDA_train_P_con1_K <- subset(iSDA_train_P_con1, select=-c(soilN, soilP))

iSDA_test_P_con1_N <- subset(iSDA_test_P_con1, select=-c(soilP, soilK))
iSDA_test_P_con1_P <- subset(iSDA_test_P_con1, select=-c(soilN, soilK))
iSDA_test_P_con1_K <- subset(iSDA_test_P_con1, select=-c(soilN, soilP))


iSDA_train_P_con2_N <- subset(iSDA_train_P_con2, select=-c(soilP, soilK))
iSDA_train_P_con2_P <- subset(iSDA_train_P_con2, select=-c(soilN, soilK))
iSDA_train_P_con2_K <- subset(iSDA_train_P_con2, select=-c(soilN, soilP))

iSDA_test_P_con2_N <- subset(iSDA_test_P_con2, select=-c(soilP, soilK))
iSDA_test_P_con2_P <- subset(iSDA_test_P_con2, select=-c(soilN, soilK))
iSDA_test_P_con2_K <- subset(iSDA_test_P_con2, select=-c(soilN, soilP))



isric_train_P_con1_N <- subset(isric_train_P_con1, select=-c(soilP, soilK))
isric_train_P_con1_P <- subset(isric_train_P_con1, select=-c(soilN, soilK))
isric_train_P_con1_K <- subset(isric_train_P_con1, select=-c(soilN, soilP))

isric_test_P_con1_N <- subset(isric_test_P_con1, select=-c(soilP, soilK))
isric_test_P_con1_P <- subset(isric_test_P_con1, select=-c(soilN, soilK))
isric_test_P_con1_K <- subset(isric_test_P_con1, select=-c(soilN, soilP))



isric_train_P_con2_N <- subset(isric_train_P_con2, select=-c(soilP, soilK))
isric_train_P_con2_P <- subset(isric_train_P_con2, select=-c(soilN, soilK))
isric_train_P_con2_K <- subset(isric_train_P_con2, select=-c(soilN, soilP))

isric_test_P_con2_N <- subset(isric_test_P_con2, select=-c(soilP, soilK))
isric_test_P_con2_P <- subset(isric_test_P_con2, select=-c(soilN, soilK))
isric_test_P_con2_K <- subset(isric_test_P_con2, select=-c(soilN, soilP))



require(gtools)
require(caret)
require(randomForest)
custom <- trainControl(method="oob", number=10)


set.seed(773)

Ndata_Train <- iSDA_train_P_con1_N##0.56, 0.55
Ndata_Valid <- iSDA_test_P_con1_N

Ndata_Train <- iSDA_train_P_con2_N##0.64
Ndata_Valid <- iSDA_test_P_con2_N


Ndata_Train <- isric_train_P_con1_N##0.24, 0.37
Ndata_Valid <- isric_test_P_con1_N

Ndata_Train <- isric_train_P_con2_N##0.40, 0.51
Ndata_Valid <- isric_test_P_con2_N


set.seed(773)
rm(RF_N1)
RF_N1 <- randomForest(soilN ~ ., Ndata_Train, importance=TRUE, ntree=1000)
Ndata_Valid$predN <- predict(RF_N1, Ndata_Valid)
ACAI_isda_wide$predN <- predict(RF_N1, ACAI_isda_wide)

Ndata_Train <- subset(Ndata_Train, select=-c(aluminium_extractable_0_20, aluminium_extractable_20_50,
                                             zinc_extractable_0_20, zinc_extractable_20_50,
                                             calcium_extractable_0_20, calcium_extractable_20_50 ))
dotplot(Ndata_Train$soilN) ##37
dotplot(ACAI_isda_wide$predN) ## 46

i=41
names(Ndata_Valid[i])
names(ACAI_isda_wide[i])
par(mfrow=c(2,2))
hist(Ndata_Valid[,i])
hist(ACAI_isda_wide[,i])
summary(ACAI_isda_wide[,i])
summary(ACAI_isda_wide[,i])



sst <- sum((ACAI_isda_wide$soilN - mean(ACAI_isda_wide$soilN))^2)
sse <- sum((ACAI_isda_wide$soilN - ACAI_isda_wide$predN)^2)
rsq <- 1 - sse / sst
rsq

varImp(RF_N1)



ggN <- ggplot(ACAI_isda_wide, aes(soilN, predN))+
  geom_point(size=2)+
  geom_abline(slope=1, intercept = 0)+
  theme_bw()+
  theme(axis.title = element_text(size=14),axis.text = element_text(size=14))

ggsave("soilN_Potato_JK.pdf", ggN)

summary(ACAI_isda_wide$soilN)
summary(ACAI_isda_wide$predN)

trialSite_N <- ACAI_isda_wide[, c("soilN", "predN")]
names(trialSite_N) <- c("QUEFTS_N", "RF_N")
require(tidyverse)
trialSite_Nl <- gather(trialSite_N,nprof, value ,QUEFTS_N:RF_N)


ggplot(trialSite_Nl, aes(nprof, value, fill=nprof))+
  geom_boxplot()+
  theme_bw()+
  theme(legend.title = element_blank())


# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda")
fixRanWide <- readRDS("Potato_fixRanWide_21.RDS")

ACAI_isda_wide2 <- unique(merge(ACAI_isda_wide, fixRanWide[, c("lat","lon", "FD", "district")], by=c("lat","lon")))
head(ACAI_isda_wide2)

ggN_district <- ggplot(ACAI_isda_wide2, aes(soilN, predN))+
  geom_point(size=2)+
  geom_abline(slope=1, intercept = 0)+
  facet_wrap(~district)+
  theme_bw()+
  theme(axis.title = element_text(size=14),axis.text = element_text(size=14))

ggsave('ggN_district_trialLoc.pdf',ggN_district)

Pdata_Train <- iSDA_train_P_con1_P##0.15
Pdata_Valid <- iSDA_test_P_con1_P

Pdata_Train <- iSDA_train_P_con2_P##0.15
Pdata_Valid <- iSDA_test_P_con2_P


Pdata_Train <- isric_train_P_con1_P##0.30
Pdata_Valid <- isric_test_P_con1_P

Pdata_Train <- isric_train_P_con2_P##0.31
Pdata_Valid <- isric_test_P_con2_P


set.seed(548)
rm(RF_P1)
RF_P1 <- randomForest(soilP ~ ., Pdata_Train, importance=TRUE, ntree=1000)
Pdata_Valid$predP <- predict(RF_P1, Pdata_Valid)
ACAI_isda_wide$predP <- predict(RF_P1, ACAI_isda_wide)

sst <- sum((ACAI_isda_wide$soilP - mean(ACAI_isda_wide$soilP))^2)
sse <- sum((ACAI_isda_wide$soilP - ACAI_isda_wide$predP)^2)
rsq <- 1 - sse / sst
rsq


ggP <- ggplot(ACAI_isda_wide, aes(soilP, predP))+
  geom_point(size=2)+
  geom_abline(slope=1, intercept = 0)+
  theme_bw()+
  theme(axis.title = element_text(size=14),axis.text = element_text(size=14))

ggsave("soilP_Potato_JK.pdf", ggP)


trialSite_P <- ACAI_isda_wide[, c("soilP", "predP")]
names(trialSite_P) <- c("QUEFTS_P", "RF_P")
trialSite_P <- gather(trialSite_P, Pprof, value ,QUEFTS_P:RF_P)

summary(ACAI_isda_wide$predP)
summary(ACAI_isda_wide$soilP)

ggplot(trialSite_P, aes(Pprof, value, fill=Pprof))+
  geom_boxplot()+
  theme_bw()+
  theme(legend.title = element_blank())

dotplot(Pdata_Train$soilP)
dotplot(ACAI_isda_P$predP)





Kdata_Train <- iSDA_train_P_con1_K##0.48
Kdata_Valid <- iSDA_test_P_con1_K

Kdata_Train <- iSDA_train_P_con2_K##0.51
Kdata_Valid <- iSDA_test_P_con2_K

Kdata_Train <- isric_train_P_con1_K## 0.43
Kdata_Valid <- isric_test_P_con1_K

Kdata_Train <- isric_train_P_con2_K##0.45
Kdata_Valid <- isric_test_P_con2_K

set.seed(548)
rm(RF_K1)
RF_K1 <- randomForest(soilK ~ ., Kdata_Train, importance=TRUE, ntree=1000)
Kdata_Valid$predK <- predict(RF_K1, Kdata_Valid)
ACAI_isda_wide$predK <- predict(RF_K1, ACAI_isda_wide)

sst <- sum((ACAI_isda_wide$soilK - mean(ACAI_isda_wide$soilK))^2)
sse <- sum((ACAI_isda_wide$soilK - ACAI_isda_wide$predK)^2)
rsq <- 1 - sse / sst
rsq


ggK <- ggplot(ACAI_isda_wide, aes(soilK, predK))+
  geom_point(size=2)+
  geom_abline(slope=1, intercept = 0)+
  theme_bw()+
  theme(axis.title = element_text(size=14),axis.text = element_text(size=14))

ggsave("soilK_Potato_JK.pdf", ggK)



trialSite_K <- ACAI_isda_wide[, c("soilK", "predK")]
names(trialSite_K) <- c("QUEFTS_K", "RF_K")
trialSite_K <- gather(trialSite_K, Kprof, value ,QUEFTS_K:RF_K)

ggplot(trialSite_K, aes(Kprof, value, fill=Kprof))+
  geom_boxplot()+
  theme_bw()+
  theme(legend.title = element_blank())




###################################################
### ACAI_isda_wide has reverse QUEFTS soil NPK and RF soil NPK for trial sites
### get estimates of control and NPK 6 yield



source("QUEFTS_function.R")
RecoveryFraction <- data.frame(rec_N=0.41, rec_P=0.25, rec_K=0.65)## potato
RF_RQ <- NULL
for(i in 1:nrow(ACAI_isda_wide)){
  onesite <- ACAI_isda_wide[i,]	
  WLY <- 8400 
  
  
  onesite$NPK6_RQ <- Yield_S(SN = (51*RecoveryFraction$rec_N) + onesite$soilN , 
                            SP = (25*RecoveryFraction$rec_P) + onesite$soilP, 
                            SK = (45 *RecoveryFraction$rec_K) + onesite$soilK, WLY = WLY)
  
  
  onesite$NPK6_RF <- Yield_S(SN = (51*RecoveryFraction$rec_N) + onesite$predN , 
                           SP = (25*RecoveryFraction$rec_P) + onesite$predP, 
                           SK = (45 *RecoveryFraction$rec_K) + onesite$predK, WLY = WLY)
  
  
  onesite$CY_RQ <- Yield_S(SN = onesite$soilN , 
                          SP = onesite$soilP, 
                          SK = onesite$soilK, WLY = WLY)
  
  onesite$CY_RF <- Yield_S(SN = onesite$predN , 
                           SP = onesite$predP, 
                           SK = onesite$predK, WLY = WLY)
  
  RF_RQ <- rbind(RF_RQ, onesite)
  
}

ggplot(RF_RQ, aes(NPK_RQ, NPK_RF))+
  geom_point()+ xlim(1500,9000)+ ylim(1500,9000)+
  geom_abline(intercept = 0, slope=1)+
  theme_bw()

ggplot(RF_RQ, aes(CY_RQ, CY_RF))+
  geom_point()+ #xlim(1500,9000)+ ylim(1500,9000)+
  geom_abline(intercept = 0, slope=1)+
  theme_bw()



RF_NPK6_CY <- ggplot(RF_RQ, aes(CY_RF/0.21, NPK6_RF/0.21))+
  geom_point()+ #xlim(0,8000)+ ylim(0,8000)+
  geom_abline(intercept = 0, slope=1)+
  theme_bw()

ggsave("RF_NPK6_CY.pdf", RF_NPK6_CY)

ggplot(RF_RQ, aes(CY_RQ, NPK_RQ))+
  geom_point()+ xlim(1500,9000)+ ylim(1500,9000)+
  geom_abline(intercept = 0, slope=1)+
  theme_bw()


hist(RF_RQ$CON)

head(soilNPK_GPS)

ggplot(soilNPK_GPS, aes(district, soilN))+
  geom_boxplot()

ggplot(soilNPK_GPS, aes(district, NPK11))+
  geom_boxplot()


ggplot(soilNPK_GPS, aes(zeroFert, NPK11))+
  geom_point()+
  facet_wrap(~district)+
  geom_abline(slope=1, intercept = 0)


######################################################
## get soil NPK for Ritsro and Rubavu


Rfmodel_Wrapper <- function(FCY){
  require(randomForest)
  require(caret)
  require(tidyverse)
  ## training set
  # setwd("D:/ACAI_Wrapper/Rwanda")
  setwd("/home/akilimo/projects/SoilNPK/Rwanda")
  isda_train <- readRDS("Pot_RW_isda.RDS")
  isda_train <- subset(isda_train, select=-c(FD, CONclass2, CONclass1,location, fcc, slope_angle))
  
  isda_train <- isda_train %>% mutate_if(is.character, as.numeric)
  isda_train %>% str()
  
  isda_train$CONclass <- ifelse(isda_train$CON < 7.5, "class1",
                                ifelse(isda_train$CON >= 7.5 & isda_train$CON < 15, "class2",
                                       ifelse(isda_train$CON >= 15 & isda_train$CON < 22.5, "class3",
                                              ifelse(isda_train$CON >= 22.5 & isda_train$CON < 30, "class4", "class5"))))
  
  isda_train$CONclass <- as.factor(isda_train$CONclass)
  
  
  
  #prediction set
  # setwd("D:/ACAI_Wrapper/Rwanda/Potato")
  setwd("/home/akilimo/projects/SoilNPK/Rwanda/Potato")
  isda_predSet <- readRDS("RW_isda_H2O2dapth.RDS")
  RW_isda_H2O2dapth_missing <- readRDS("RW_isda_H2O2dapth_missing.RDS")
  isda_predSet$location <- paste(isda_predSet$lat, isda_predSet$lon, sep="_")
  RW_isda_H2O2dapth_missing$location <- paste(RW_isda_H2O2dapth_missing$lat, RW_isda_H2O2dapth_missing$lon, sep="_")
  
  RW_isda_H2O2dapth_missing <- RW_isda_H2O2dapth_missing[, colnames(isda_predSet)]
  isda_predSet <- unique(rbind(isda_predSet, RW_isda_H2O2dapth_missing))
  nrow(unique(isda_predSet))##834

  isda_predSet$soilOM_0_20  <- isda_predSet$carbon_organic_0_20  * 2
  isda_predSet$soilOM_20_50  <- isda_predSet$carbon_organic_20_50  * 2
  isda_predSet$CON <- FCY
  
  isda_predSet$CONclass <- ifelse(isda_predSet$CON < 7.5, "class1",
                                  ifelse(isda_predSet$CON >= 7.5 & isda_predSet$CON < 15, "class2",
                                         ifelse(isda_predSet$CON >= 15 & isda_predSet$CON < 22.5, "class3",
                                                ifelse(isda_predSet$CON >= 22.5 & isda_predSet$CON < 30, "class4", "class5"))))
  
  isda_predSet$CONclass <- as.factor(isda_predSet$CONclass)
  isda_predSet$soilN <- 0
  isda_predSet$soilP <- 0
  isda_predSet$soilK <- 0
  isda_predSet_regions <- unique(isda_predSet[, c("lat","lon", "NAME_1", "NAME_2", "location")])
  isda_predSet <- unique(isda_predSet[, c("lat","lon","CON","soilN","soilP","soilK",
                                   "aluminium_extractable_0_20","aluminium_extractable_20_50",
                                   "bulk_density_0_20","bulk_density_20_50",
                                   "calcium_extractable_0_20","calcium_extractable_20_50",
                                   "carbon_organic_0_20","carbon_organic_20_50","carbon_total_0_20",
                                   "carbon_total_20_50","cation_exchange_capacity_0_20",
                                   "cation_exchange_capacity_20_50", "clay_content_0_20",
                                   "clay_content_20_50","iron_extractable_0_20",
                                   "iron_extractable_20_50","magnesium_extractable_0_20",
                                   "magnesium_extractable_20_50", "nitrogen_total_0_20",
                                   "nitrogen_total_20_50","ph_0_20", "ph_20_50",
                                   "phosphorous_extractable_0_20", "phosphorous_extractable_20_50",
                                   "potassium_extractable_0_20","potassium_extractable_20_50",
                                   "sand_content_0_20","sand_content_20_50","silt_content_0_20",
                                   "silt_content_20_50", "stone_content_0_20","stone_content_20_50",
                                   "sulphur_extractable_0_20", "sulphur_extractable_20_50",
                                   "texture_class_0_20","texture_class_20_50",
                                   "zinc_extractable_0_20","zinc_extractable_20_50","soilOM_0_20",
                                   "soilOM_20_50","percentSOM_0_20","percentSOM_20_50",
                                   "wp_0_20","wp_20_50","FC_0_20","FC_20_50","sws_0_20",
                                   "sws_20_50","CONclass" )])
  nrow(isda_predSet)
  
  
  isda_predSet$use <- "Valid"
  isda_train$use <- "train"
  factoring <- rbind(isda_train, isda_predSet)
  
  isda_train <- factoring[factoring$use == "train", ]
  isda_train <- subset(isda_train, select=-c(use))
  
  isda_predSet <- factoring[factoring$use == "Valid", ]
  isda_predSet <- subset(isda_predSet, select=-c(use))
  
  
  Ndata_Train  <- subset(isda_train, select=-c(soilP, soilK))
  Pdata_Train  <- subset(isda_train, select=-c(soilN, soilK))
  Kdata_Train  <- subset(isda_train, select=-c(soilN, soilP))
  
  
  ## Coustome control parameter 
  #custom <- trainControl(method="repeatedcv", number=10, repeats=5, verboseIter=TRUE)
  require(caret)
  custom <- trainControl(method="oob", number=10)
  ##########################################################################
  ## Random Forest soilN:
  ##########################################################################
  set.seed(444)
  RF_N1 <- randomForest(soilN ~ ., subset(Ndata_Train, select = -c(CON, lat, lon)), importance=TRUE, ntree=1000)
  # vimpN <- as.data.frame(varImp(RF_N1))
  # vimpN$vars <- row.names(vimpN)
  # vimpN[order(vimpN$Overall), ]
  # 
  # 
  
  ##########################################################################
  ## Random Forest "soilP" 
  ##########################################################################
  set.seed(773)
  RF_P1 <- randomForest(soilP ~ ., subset(Pdata_Train, select=-c(CON, lat, lon)),importance=TRUE, ntree=1000)
  
  ##########################################################################
  ## Random Forest soilK" R sq. 0.60 if control is used, 0.29 otherwise
  ##########################################################################
  set.seed(773)
  RF_K1 <- randomForest(soilK ~ ., subset(Kdata_Train, select=-c(CON, lat, lon)), importance=TRUE, ntree=1000)
  
  ##########################################################################
  ## use the random forest model and get the soil NPK estimates for the whole area
  ##########################################################################
  # a.	-1.575, 29.325
  # b.	-1.625, 29.325
  # c.	-1.625, 29.375
  # d.	-1.625, 29.425 
  # e.	-2.075,  29.475
  # f.	-2.075, 29, 525
  
  isda_predSet <- unique(isda_predSet)
  nrow(isda_predSet)
  isda_predSet$soilN <-  predict(RF_N1, isda_predSet)
  isda_predSet$soilP <-  predict(RF_P1, isda_predSet)
  isda_predSet$soilK <-  predict(RF_K1, isda_predSet)
  
  nrow(isda_predSet)
  
  Ndata_Train$pred_soilN <- predict(RF_N1, Ndata_Train)
  Pdata_Train$pred_soilP <- predict(RF_P1, Pdata_Train)
  Kdata_Train$pred_soilK <- predict(RF_K1, Kdata_Train)
  
  
  dotplot(Ndata_Train$soilN)
  dotplot(Ndata_Train$pred_soilN)
  dotplot(isda_predSet$soilN)
  
  dotplot(Pdata_Train$soilP)
  dotplot(Pdata_Train$pred_soilP)
  dotplot(isda_predSet$soilP)
  
  dotplot(Kdata_Train$pred_soilK)
  dotplot(Kdata_Train$soilK)
  dotplot(isda_predSet$soilK)
  
  par(mfrow=c(3,2))
  hist(Ndata_Train$soilN)
  hist(isda_predSet$soilN)
  
  hist(Pdata_Train$soilP)
  hist(isda_predSet$soilP)
  
  hist(Kdata_Train$soilK)
  hist(isda_predSet$soilK)
  nrow(isda_predSet)
  
  soilN_NA <- isda_predSet[is.na(isda_predSet$soilN), ]
  
  isda_predSet <- isda_predSet[!is.na(isda_predSet$soilN), ]
  nrow(isda_predSet_regions)
  nrow(isda_predSet)
  isda_predSet <- unique(merge(isda_predSet, isda_predSet_regions, by=c("lat", "lon")))
  
  isda_predSet <- isda_predSet[, c("location","lat","lon","soilN" ,"soilP", "soilK", "CON","CONclass","NAME_1","NAME_2")]
  str(isda_predSet)
  return(isda_predSet)
}
  

soilNPK_FCY1_RW <- Rfmodel_Wrapper(FCY = 3)##18.03   27.51   29.07   33.15   31.35  125.38      44
soilNPK_FCY2_RW <- Rfmodel_Wrapper(FCY = 12)
soilNPK_FCY3_RW <- Rfmodel_Wrapper(FCY = 18)
soilNPK_FCY4_RW <- Rfmodel_Wrapper(FCY = 26)
soilNPK_FCY5_RW <- Rfmodel_Wrapper(FCY = 30)

nrow(soilNPK_FCY2_RW)

PotatoRegion <- c("Nyabihu", "Rubavu", "Gicumbi", "Musanze", "Burera","Rutsiro")
soilNPK_FCY2_RW_target  <- soilNPK_FCY2_RW[soilNPK_FCY2_RW$NAME_2 %in% PotatoRegion, ]

solN_target <- ggplot(soilNPK_FCY2_RW_target, aes(NAME_2, soilN)) +
  geom_boxplot() +
  theme_bw()
  ggsave("solN_targetArea.pdf", solN_target)
  


summary(soilNPK_FCY1_RW[, c("soilN", "soilP", "soilK")])##
summary(soilNPK_FCY2_RW[, c("soilN", "soilP", "soilK")])
summary(soilNPK_FCY3_RW[, c("soilN", "soilP", "soilK")])
summary(soilNPK_FCY4_RW[, c("soilN", "soilP", "soilK")])
summary(soilNPK_FCY5_RW[, c("soilN", "soilP", "soilK")])

# setwd("D:/ACAI_Wrapper/Rwanda/Potato")
setwd("/home/akilimo/projects/SoilNPK/Rwanda/Potato")
saveRDS(soilNPK_FCY1_RW, "soilNPK_FCY1_RW.RDS")
saveRDS(soilNPK_FCY2_RW, "soilNPK_FCY2_RW.RDS")
saveRDS(soilNPK_FCY3_RW, "soilNPK_FCY3_RW.RDS")
saveRDS(soilNPK_FCY4_RW, "soilNPK_FCY4_RW.RDS")
saveRDS(soilNPK_FCY5_RW, "soilNPK_FCY5_RW.RDS")

#### checking out how the RF model is working

head(soilNPK_FCY1_RW)

soilNPKTarget <- rbind(soilNPK_FCY1_RW, soilNPK_FCY2_RW, soilNPK_FCY3_RW,
                       soilNPK_FCY4_RW, soilNPK_FCY5_RW)
soilNPKTarget$FCY <- soilNPKTarget$CONclass


soilN_Target <- soilNPKTarget[, c("FCY", "soilN")]
colnames(soilN_Target) <- c("nprof", "value")
NN <- rbind(trialSite_Nl, soilN_Target)

NN$nprof <- factor(NN$nprof, levels=c("QUEFTS_N", "RF_N", "class1",
                                      "class2", "class3", "class4", "class5"))
ggallN <- ggplot(NN, aes(nprof, value, fill=nprof))+
  geom_boxplot()+
  xlab("predicted soil N") +
  theme_bw()+
  theme(axis.text = element_text(size=14), axis.title = element_text(size=14),
        legend.position = "none")

ggsave("soilN_allversions.pdf", ggallN)

soilP_Target <- soilNPKTarget[, c("FCY", "soilP")]
colnames(soilP_Target) <- c("Pprof", "value")
PP <- rbind(trialSite_P, soilP_Target)
PP$Pprof <- factor(PP$Pprof, levels=c("QUEFTS_P", "RF_P", "class1",
                                      "class2", "class3", "class4", "class5"))
ggallP <- ggplot(PP, aes(Pprof, value, fill=Pprof))+
  geom_boxplot()+
  xlab("predicted soil P") +
  theme_bw()+
  theme(axis.text = element_text(size=14), axis.title = element_text(size=14),
        legend.position = "none")

ggsave("soilP_allversions.pdf", ggallP)


soilK_Target <- soilNPKTarget[, c("FCY", "soilK")]
colnames(soilK_Target) <- c("Kprof", "value")
KK <- rbind(trialSite_K, soilK_Target)
KK$Kprof <- factor(KK$Kprof, levels=c("QUEFTS_K", "RF_K", "class1",
                                      "class2", "class3", "class4", "class5"))
ggallK <- ggplot(KK, aes(Kprof, value, fill=Kprof))+
  geom_boxplot()+
  xlab("predicted soil K") +
  theme_bw()+
  theme(axis.text = element_text(size=14), axis.title = element_text(size=14),
        legend.position = "none")

ggsave("soilK_allversions.pdf", ggallK)

##############################################################################
### get current yield assuming 80% of NOT trial yield as WLY = 35 ton
# setwd("D:/ACAI_Wrapper/Rwanda/Potato")
setwd("/home/akilimo/projects/SoilNPK/Rwanda/Potato")
source("QUEFTS_Potato_function.R")
# longData <- readRDS("Potato_longData_21.RDS")
# head(longData)
# quantile(longData$NPK11, probs=seq(0,1,0.1))##40 ton

### GET soil NPK
soilNPK_FCY1_RW <- readRDS("soilNPK_FCY1_RW.RDS")
soilNPK_FCY2_RW <- readRDS( "soilNPK_FCY2_RW.RDS")
soilNPK_FCY3_RW <- readRDS( "soilNPK_FCY3_RW.RDS")
soilNPK_FCY4_RW <- readRDS( "soilNPK_FCY4_RW.RDS")
soilNPK_FCY5_RW <- readRDS( "soilNPK_FCY5_RW.RDS")

head(soilNPK_FCY1_RW)

ggplot(soilNPK_FCY1_RW, aes(soilN))+
  geom_histogram()

ggplot(soilNPK_FCY1_RW, aes(lon, lat, fill=soilP))+
  geom_tile()

ggplot(soilNPK_FCY1_RW, aes(lon, lat, fill=soilK))+
  geom_tile()


getCY_pot <- function(soilD){
  SoilData <- soilD
  SoilData$rel_N <- 1
  SoilData$water_limited_yield <- 8400#40*1000*0.21
  head(SoilData)
  SoilData <- SoilData[!is.na(SoilData$soilN), ]
  
  WLY_CY_FCY1_RW_PO <- NULL ## run X
  for(k in 1:nrow(SoilData)){
    print(k)
    soilDD <- SoilData[k, ]
    CY <- Yield_S(SN = soilDD$soilN*0.41,
                  SP = soilDD$soilP*0.25,
                  SK = soilDD$soilK*0.65, WLY = soilDD$water_limited_yield)
    soilDD$Current_Yield <- CY
    # soilDD <- QUEFTS_WLYCY_Potato(SoilData_onePix=soilDD) # in kg/ha dry

    
    WLY_CY_FCY1_RW_PO <- rbind(WLY_CY_FCY1_RW_PO, soilDD)
  }
  return(WLY_CY_FCY1_RW_PO)
}

## FCY 1: dry wt
WLY_CY_FCY1_RW_PO <- getCY_pot(soilD = soilNPK_FCY1_RW)
saveRDS(WLY_CY_FCY1_RW_PO, "RW_Pot_CY_FCY1.RDS")

WLY_CY_FCY2_RW_PO <- getCY_pot(soilD = soilNPK_FCY2_RW)
saveRDS(WLY_CY_FCY2_RW_PO, "RW_Pot_CY_FCY2.RDS")

WLY_CY_FCY3_RW_PO <- getCY_pot(soilD = soilNPK_FCY3_RW)
saveRDS(WLY_CY_FCY3_RW_PO, "RW_Pot_CY_FCY3.RDS")

WLY_CY_FCY4_RW_PO <- getCY_pot(soilD = soilNPK_FCY4_RW)
saveRDS(WLY_CY_FCY4_RW_PO, "RW_Pot_CY_FCY4.RDS")

WLY_CY_FCY5_RW_PO <- getCY_pot(soilD = soilNPK_FCY5_RW)
saveRDS(WLY_CY_FCY5_RW_PO, "RW_Pot_CY_FCY5.RDS")


#### testing the CY generation
# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda/Potato")
source("QUEFTS_function.R")

getCY <- function(soilD){
  zeroFertYield <- NULL
  for(i in 1:nrow(soilD)){
    print(i)
    oneTrial <- soilD[i,]	
    WLY <- 8400
    
    controlYield <- Yield_S(SN = oneTrial$soilN , 
                            SP = oneTrial$soilP, 
                            SK = oneTrial$soilK, WLY = WLY)
    oneTrial$Current_Yield <- controlYield
    oneTrial$water_limited_yield <- 8400
    oneTrial$WLY <- 8400 / 0.21
    oneTrial$CY <- oneTrial$Current_Yield / 0.21
    zeroFertYield <- rbind(zeroFertYield, oneTrial)
    
  }
  return(zeroFertYield)
}



WLY_CY_FCY1_RW_PO <- getCY(soilD = soilNPK_FCY1_RW)
saveRDS(WLY_CY_FCY1_RW_PO, "RW_Pot_CY_FCY1.RDS")

WLY_CY_FCY2_RW_PO <- getCY(soilD = soilNPK_FCY2_RW)
saveRDS(WLY_CY_FCY2_RW_PO, "RW_Pot_CY_FCY2.RDS")

WLY_CY_FCY3_RW_PO <- getCY(soilD = soilNPK_FCY3_RW)
saveRDS(WLY_CY_FCY3_RW_PO, "RW_Pot_CY_FCY3.RDS")

WLY_CY_FCY4_RW_PO <- getCY(soilD = soilNPK_FCY4_RW)
saveRDS(WLY_CY_FCY4_RW_PO, "RW_Pot_CY_FCY4.RDS")

WLY_CY_FCY5_RW_PO <- getCY(soilD = soilNPK_FCY5_RW)
saveRDS(WLY_CY_FCY5_RW_PO, "RW_Pot_CY_FCY5.RDS")



#####################################

summary(WLY_CY_FCY1_RW_PO$Current_Yield)
summary(WLY_CY_FCY2_RW_PO$Current_Yield)
summary(WLY_CY_FCY3_RW_PO$Current_Yield)
summary(WLY_CY_FCY4_RW_PO$Current_Yield)
summary(WLY_CY_FCY5_RW_PO$Current_Yield)


summary(WLY_CY_FCY1_RW_PO$soilN)
summary(WLY_CY_FCY2_RW_PO$soilN)
summary(WLY_CY_FCY3_RW_PO$soilN)


## generating fertilizer recommendation

# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda/Potato")

## ## both WLY and CY are dry matter
WLY_CY_FCY1_RW_PO <- readRDS("RW_Pot_CY_FCY1.RDS")
WLY_CY_FCY2_RW_PO <- readRDS("RW_Pot_CY_FCY2.RDS")
WLY_CY_FCY3_RW_PO <- readRDS("RW_Pot_CY_FCY3.RDS")
WLY_CY_FCY4_RW_PO <- readRDS("RW_Pot_CY_FCY4.RDS")
WLY_CY_FCY5_RW_PO <- readRDS("RW_Pot_CY_FCY5.RDS")

# 
# WLY_CY_FCY1_RW_PO$WLY <- WLY_CY_FCY1_RW_PO$water_limited_yield / 0.21
# WLY_CY_FCY1_RW_PO$CY <- WLY_CY_FCY1_RW_PO$Current_Yield / 0.21
# 
# WLY_CY_FCY2_RW_PO$WLY <- WLY_CY_FCY2_RW_PO$water_limited_yield / 0.21
# WLY_CY_FCY2_RW_PO$CY <- WLY_CY_FCY2_RW_PO$Current_Yield / 0.21
# 
# WLY_CY_FCY3_RW_PO$WLY <- WLY_CY_FCY3_RW_PO$water_limited_yield / 0.21
# WLY_CY_FCY3_RW_PO$CY <- WLY_CY_FCY3_RW_PO$Current_Yield / 0.21
# 
# WLY_CY_FCY4_RW_PO$WLY <- WLY_CY_FCY4_RW_PO$water_limited_yield / 0.21
# WLY_CY_FCY4_RW_PO$CY <- WLY_CY_FCY4_RW_PO$Current_Yield / 0.21
# 
# WLY_CY_FCY5_RW_PO$WLY <- WLY_CY_FCY5_RW_PO$water_limited_yield / 0.21
# WLY_CY_FCY5_RW_PO$CY <- WLY_CY_FCY5_RW_PO$Current_Yield / 0.21
# 
head(soilNPK_GPS)
summary(soilNPK_GPS$zeroFert)
summary(WLY_CY_FCY1_RW_PO[, c("CY")])/1000
summary(WLY_CY_FCY2_RW_PO[, c("CY")])/1000
summary(WLY_CY_FCY3_RW_PO[, c("CY")])/1000
summary(WLY_CY_FCY4_RW_PO[, c("CY")])/1000
summary(WLY_CY_FCY5_RW_PO[, c("CY")])/1000


summary(soilNPK_GPS[soilNPK_GPS$district == "Rubavu", ]$soilN)
summary(WLY_CY_FCY1_RW_PO[WLY_CY_FCY1_RW_PO$NAME_2 == "Rubavu", ]$soilN)

summary(soilNPK_GPS[soilNPK_GPS$district == "Rubavu", ]$soilP)
summary(WLY_CY_FCY1_RW_PO[WLY_CY_FCY1_RW_PO$NAME_2 == "Rubavu", ]$soilP)

summary(soilNPK_GPS[soilNPK_GPS$district == "Rubavu", ]$soilK)
summary(WLY_CY_FCY1_RW_PO[WLY_CY_FCY1_RW_PO$NAME_2 == "Rubavu", ]$soilK)

summary(soilNPK_GPS[soilNPK_GPS$district == "Rubavu", ]$zeroFert)
summary(WLY_CY_FCY1_RW_PO[WLY_CY_FCY1_RW_PO$NAME_2 == "Rubavu", ]$CY/1000)



summary(WLY_CY_FCY1_RW_PO[, c("water_limited_yield", "Current_Yield", "CY", "WLY")])
summary(WLY_CY_FCY2_RW_PO[, c("water_limited_yield", "Current_Yield", "CY", "WLY")])
summary(WLY_CY_FCY3_RW_PO[, c("water_limited_yield", "Current_Yield", "CY", "WLY")])
summary(WLY_CY_FCY4_RW_PO[, c("water_limited_yield", "Current_Yield", "CY", "WLY")])
summary(WLY_CY_FCY5_RW_PO[, c("water_limited_yield", "Current_Yield", "CY", "WLY")])


hist(WLY_CY_FCY1_RW_PO$Current_Yield)
hist(WLY_CY_FCY1_RW_PO$soilN)
quantile(WLY_CY_FCY1_RW_PO$soilN, probs=seq(0,1,0.1))
quantile(WLY_CY_FCY1_RW_PO$soilP, probs=seq(0,1,0.1))
quantile(WLY_CY_FCY1_RW_PO$soilK, probs=seq(0,1,0.1))

WLY_CY_FCY1_RW_PO[WLY_CY_FCY1_RW_PO$Current_Yield < 5000,]
WLY_CY_FCY1_RW_PO[WLY_CY_FCY1_RW_PO$soilN > 25,]


str(WLY_CY_FCY2_RW_PO)
unique(WLY_CY_FCY2_RW_PO$NAME_2)##Rutsiro, Rubavu,Nyabihu,Musanze,Burera,Gicumbi,Ngororero,Nyamagabe,Nyaruguru,Karongi    

# setwd("D:/ACAI_Wrapper/Rwanda/Potato")
setwd("/home/akilimo/projects/SoilNPK/Rwanda/Potato")
source("QUEFTS_Potato_function.R")
fertilizer_Potato <- fertilizerFunc(ureaavailable=TRUE, ureaCostperBag=0,ureaBagWt=50,
                              MOPavailable=TRUE, MOPCostperBag=0, MOPBagWt=50,
                              DAPavailable=TRUE, DAPCostperBag=0, DAPBagWt=50,
                              TSPavailable=TRUE, TSPCostperBag=0, TSPBagWt=50,
                              NPK171717available=TRUE, NPK171717CostperBag=0, NPK171717BagWt=50, 
                              country="RW")



fertilizer_Potato <- fertilizer_Potato[fertilizer_Potato$type %in% c("Urea", "DAP", "NPK17_17_17"), ]
fertilizer_Potato$price <- c(564, 633, 713)


# maxInv <- 300000 # 3000 FRw/are = 300000 RWF/ha  validation trial season 1 ## 300 USD
maxInv <- 213000 # 2130 FRw/are =213000 RWF/ha  validation trial season 2 ##213 USD

rootUP <- 200 ##200 FRw/kg = 




# setwd("D:/ACAI_Wrapper/Rwanda/Potato")
setwd("/home/akilimo/projects/SoilNPK/Rwanda/Potato")
isda_predSet <- readRDS("RW_isda_H2O2dapth.RDS")
RW_isda_H2O2dapth_missing <- readRDS("RW_isda_H2O2dapth_missing.RDS")
isda_predSet$location <- paste(isda_predSet$lat, isda_predSet$lon, sep="_")
RW_isda_H2O2dapth_missing$location <- paste(RW_isda_H2O2dapth_missing$lat, RW_isda_H2O2dapth_missing$lon, sep="_")
isda_predSet <- rbind(isda_predSet, RW_isda_H2O2dapth_missing)





## FRrecom
loop_recom <- function(WLY_CY_Data){
  Recom_FCY2 <- NULL
  for (j in 1: nrow(WLY_CY_Data)){
    print(j)
    WLY_CY <- WLY_CY_Data[j,]
    WLY <- WLY_CY$water_limited_yield
    
    reco_J <- getFRrecommendations_potato(maxInv = maxInv, 
                                          fertilizers=fertilizer_Potato,
                                          rootUP = rootUP, 
                                          areaHa = 1,
                                          WLY_CY = WLY_CY, WLY=WLY)
    if(is.null(reco_J$fertilizer_rates) ){
      rr1 <- reco_J$rec
      rr1$Urea <- 0
      rr1$NPK17_17_17 <- 0
      rr1$DAP <- 0
      rr1 <- rr1[, c("lat","lon","N", "P", "K", "CONclass","NAME_1","NAME_2", "WLY","CY",
                     "TargetY", "TC", "NR","Urea", "NPK17_17_17", "DAP")]
      Recom_FCY2 <- rbind(Recom_FCY2, rr1)
    }else{
      tt <- spread(reco_J$fertilizer_rates, type, rate)
      if(is.null(tt$NPK17_17_17)){
        tt$NPK17_17_17 <- 0
      }
      if(is.null(tt$Urea)){
        tt$Urea <- 0
      }
      if(is.null(tt$DAP)){
        tt$DAP <- 0
      }
      tt <- tt[, c("Urea","NPK17_17_17", "DAP")]
      rr1 <- cbind(reco_J$rec, tt)
    }
    Recom_FCY2 <- rbind(Recom_FCY2, rr1)
  }
  return(Recom_FCY2)
}


Recom_FCY1 <- loop_recom(WLY_CY_Data = WLY_CY_FCY1_RW_PO)

Recom_FCY2 <- loop_recom(WLY_CY_Data = WLY_CY_FCY2_RW_PO)

Recom_FCY3 <- loop_recom(WLY_CY_Data = WLY_CY_FCY3_RW_PO)

Recom_FCY4 <- loop_recom(WLY_CY_Data = WLY_CY_FCY4_RW_PO)

Recom_FCY5 <- loop_recom(WLY_CY_Data = WLY_CY_FCY5_RW_PO)


Recom_FCY2[Recom_FCY2$TC > 214000,]
Recom_FCY3[Recom_FCY3$TC > 214000,]
Recom_FCY4[Recom_FCY4$TC > 214000,]
Recom_FCY5[Recom_FCY5$TC > 214000,]


#### missing points
missing_points <- unique(isda_predSet[!isda_predSet$location %in% Recom_FCY1$location, ])
missing_points <- unique(missing_points[, c("lat", "lon", "location")])
missing_points <- rbind(missing_points,
                        data.frame(lat=c(-1.425,-1.475,-1.475,-1.475,-1.525,-2.025, -1.775, -1.975, -1.925, -1.875, -2.125), 
                                   lon=c(29.775,29.775,29.925,30.625,30.725,29.325, 29.225, 29.325, 29.325, 29.325, 30.325),
                                   location = c("-1.425_29.775","-1.475_29.775",
                                                "-1.475_29.925", "-1.475_30.625",
                                                "-1.525_30.725","-2.025_29.325",
                                                "-1.775_29.225","-1.975_29.325",
                                                "-1.925_29.325","-1.875_29.325",
                                                "-2.125_30.325")))


mm <- missing_points[missing_points$location %in% 
                       c("-1.425_29.775","-1.475_29.775",
                         "-1.475_29.925", "-1.475_30.625",
                         "-1.525_30.725",
                         "-2.025_29.325","-1.775_29.225",
                         "-1.975_29.325",
                         "-1.925_29.325",
                         "-1.875_29.325","-2.125_30.325"), ]
mm$col <- "green"
ppl <- Recom_FCY1[, c("lon", "lat")]
ppl$col="red"

kk <- rbind(ppl, mm[, c("lon", "lat", "col")])

require(ggplot2)
ggplot(kk, aes(lon, lat, col=factor(col)))+
  geom_point()+
  geom_point(data = kk[kk$col == "green",], size=2)
mm <- unique(mm)

miss_fc1 <- NULL
miss_fc2 <- NULL
miss_fc3 <- NULL
miss_fc4 <- NULL
miss_fc5 <- NULL

for(i in 1:nrow(mm)){
  mm1 <- mm[i,]
  la <- c(mm1$lat, mm1$lat + 0.05, mm1$lat - 0.05)
  lo <- c(mm1$lon, mm1$lon + 0.05, mm1$lon - 0.05)
  dd <- data.frame(expand.grid(la, lo)) 
  dd$location <- paste(dd$Var1, dd$Var2, sep="_")
  
  fc1 <-  WLY_CY_FCY1_RW_PO[WLY_CY_FCY1_RW_PO$location %in% dd$location, ]
  fc1$soilN <- mean(fc1$soilN)
  fc1$soilP <- mean(fc1$soilP)
  fc1$soilK <- mean(fc1$soilK)
  fc1$Current_Yield <- mean(fc1$Current_Yield)
  tt1<- fc1[1,]
  tt1$CY <- tt1$Current_Yield/0.21
  tt1$lat <- mm1$lat
  tt1$lon <- mm1$lon
  tt1$location <- mm1$location
  miss_fc1 <- rbind(miss_fc1, tt1)
  
  
  fc2 <-  WLY_CY_FCY2_RW_PO[WLY_CY_FCY2_RW_PO$location %in% dd$location, ]
  fc2$soilN <- mean(fc2$soilN)
  fc2$soilP <- mean(fc2$soilP)
  fc2$soilK <- mean(fc2$soilK)
  fc2$Current_Yield <- mean(fc2$Current_Yield)
  tt2<- fc2[1,]
  tt2$CY <- tt2$Current_Yield/0.21
  tt2$lat <- mm1$lat
  tt2$lon <- mm1$lon
  tt2$location <- mm1$location
  miss_fc2 <- rbind(miss_fc2, tt2)
  
  fc3 <-  WLY_CY_FCY3_RW_PO[WLY_CY_FCY3_RW_PO$location %in% dd$location, ]
  fc3$soilN <- mean(fc3$soilN)
  fc3$soilP <- mean(fc3$soilP)
  fc3$soilK <- mean(fc3$soilK)
  fc3$Current_Yield <- mean(fc3$Current_Yield)
  tt3<- fc3[1,]
  tt3$CY <- tt3$Current_Yield/0.21
  tt3$lat <- mm1$lat
  tt3$lon <- mm1$lon
  tt3$location <- mm1$location
  miss_fc3 <- rbind(miss_fc3, tt3)
  
  fc4 <-  WLY_CY_FCY4_RW_PO[WLY_CY_FCY4_RW_PO$location %in% dd$location, ]
  fc4$soilN <- mean(fc4$soilN)
  fc4$soilP <- mean(fc4$soilP)
  fc4$soilK <- mean(fc4$soilK)
  fc4$Current_Yield <- mean(fc4$Current_Yield)
  tt4<- fc4[1,]
  tt4$CY <- tt4$Current_Yield/0.21
  tt4$lat <- mm1$lat
  tt4$lon <- mm1$lon
  tt4$location <- mm1$location
  miss_fc4 <- rbind(miss_fc4, tt4)
  
  fc5 <-  WLY_CY_FCY5_RW_PO[WLY_CY_FCY5_RW_PO$location %in% dd$location, ]
  fc5$soilN <- mean(fc5$soilN)
  fc5$soilP <- mean(fc5$soilP)
  fc5$soilK <- mean(fc5$soilK)
  fc5$Current_Yield <- mean(fc5$Current_Yield)
  tt5<- fc5[1,]
  tt5$CY <- tt5$Current_Yield/0.21
  tt5$lat <- mm1$lat
  tt5$lon <- mm1$lon
  tt5$location <- mm1$location
  miss_fc5 <- rbind(miss_fc5, tt5)
  
}

Recom_FCY1_miss <- loop_recom(WLY_CY_Data = miss_fc1)

Recom_FCY2_miss <- loop_recom(WLY_CY_Data = miss_fc2)

Recom_FCY3_miss <- loop_recom(WLY_CY_Data = miss_fc3)

Recom_FCY4_miss <- loop_recom(WLY_CY_Data = miss_fc4)

Recom_FCY5_miss <- loop_recom(WLY_CY_Data = miss_fc5)





Recom_FCY1 <- unique(rbind(Recom_FCY1, Recom_FCY1_miss))
saveRDS(Recom_FCY1, "RW_Recom_FCY1_Val_21.RDS")

Recom_FCY2 <- unique(rbind(Recom_FCY2, Recom_FCY2_miss))
saveRDS(Recom_FCY2, "RW_Recom_FCY2_Val_21.RDS")

Recom_FCY3 <- unique(rbind(Recom_FCY3, Recom_FCY3_miss))
saveRDS(Recom_FCY3, "RW_Recom_FCY3_Val_21.RDS")

Recom_FCY4 <- unique(rbind(Recom_FCY4, Recom_FCY4_miss))
saveRDS(Recom_FCY4, "RW_Recom_FCY4_Val_21.RDS")

Recom_FCY5 <- unique(rbind(Recom_FCY5, Recom_FCY5_miss))
saveRDS(Recom_FCY5, "RW_Recom_FCY5_Val_21.RDS")






##############################

# setwd("D:/ACAI_Wrapper/Rwanda/Potato")
setwd("/home/akilimo/projects/SoilNPK/Rwanda/Potato")
# Recom_FCY1 <- readRDS("RW_Recom_FCY1_B.RDS")
# Recom_FCY2 <- readRDS("RW_Recom_FCY2_B.RDS")
# Recom_FCY3 <- readRDS("RW_Recom_FCY3_B.RDS")
# Recom_FCY4 <- readRDS("RW_Recom_FCY4_B.RDS")
# Recom_FCY5 <- readRDS("RW_Recom_FCY5_B.RDS")
## season 1
Recom_FCY1 <- readRDS("RW_Recom_FCY1_C.RDS")
Recom_FCY2 <- readRDS("RW_Recom_FCY2_C.RDS")
Recom_FCY3 <- readRDS("RW_Recom_FCY3_C.RDS")
Recom_FCY4 <- readRDS("RW_Recom_FCY4_C.RDS")
Recom_FCY5 <- unique(readRDS("RW_Recom_FCY5_C.RDS"))


## season 2 val
Recom_FCY1 <- readRDS("RW_Recom_FCY1_Val_21.RDS")
Recom_FCY2 <- readRDS("RW_Recom_FCY2_Val_21.RDS")
Recom_FCY3 <- readRDS("RW_Recom_FCY3_Val_21.RDS")
Recom_FCY4 <- readRDS("RW_Recom_FCY4_Val_21.RDS")
Recom_FCY5 <- unique(readRDS("RW_Recom_FCY5_Val_21.RDS"))

Recom_FCY1 <- unique(Recom_FCY1)
Recom_FCY2 <- unique(Recom_FCY2)
Recom_FCY3 <- unique(Recom_FCY3)
Recom_FCY4 <- unique(Recom_FCY4)
Recom_FCY5 <- unique(Recom_FCY5)



Recom_FCY1$location <- paste(Recom_FCY1$lat, Recom_FCY1$lon, sep="_")
Recom_FCY2$location <- paste(Recom_FCY2$lat, Recom_FCY2$lon, sep="_")
Recom_FCY3$location <- paste(Recom_FCY3$lat, Recom_FCY3$lon, sep="_")
Recom_FCY4$location <- paste(Recom_FCY4$lat, Recom_FCY4$lon, sep="_")
Recom_FCY5$location <- paste(Recom_FCY5$lat, Recom_FCY5$lon, sep="_")


nrow((Recom_FCY1))
nrow((Recom_FCY2))
nrow((Recom_FCY3))
nrow((Recom_FCY4))
nrow((Recom_FCY5))


ncol((Recom_FCY1))
ncol((Recom_FCY2))
ncol((Recom_FCY3))
ncol((Recom_FCY4))
ncol((Recom_FCY5))

head(Recom_FCY5)

unique(Recom_FCY5$NAME_2)

Recom_FCY1[Recom_FCY1$lat == -1.625 & Recom_FCY1$lon == 29.325, ]


require(ggplot2)
head(Recom_FCY1)
# Recom_FCY1 <- Recom_FCY1[Recom_FCY1$lat!=-1.625 & Recom_FCY1$lon != 29.325, ]
# Recom_FCY2 <- Recom_FCY2[Recom_FCY2$lat!=-1.625 & Recom_FCY2$lon != 29.325, ]
# Recom_FCY3 <- Recom_FCY3[Recom_FCY3$lat!=-1.625 & Recom_FCY3$lon != 29.325, ]
# Recom_FCY4 <- Recom_FCY4[Recom_FCY4$lat!=-1.625 & Recom_FCY4$lon != 29.325, ]
# Recom_FCY5 <- Recom_FCY5[Recom_FCY5$lat!=-1.625 & Recom_FCY5$lon != 29.325, ]

# plots currentY vs target, histogram of targetY - currentY, histogram of netRev and totalCost, netRev vs totalCost.

head(Recom_FCY1)
Recom_FCY1$FCY <- "class1"
Recom_FCY2$FCY <- "class2"
Recom_FCY3$FCY <- "class3"
Recom_FCY4$FCY <- "class4"
Recom_FCY5$FCY <- "class5"

Arifu_FCY1 <- Recom_FCY1
Arifu_FCY2 <- Recom_FCY2
Arifu_FCY3 <- Recom_FCY3
Arifu_FCY4 <- Recom_FCY4
Arifu_FCY5 <- Recom_FCY5


Recom_P <- rbind(Recom_FCY1, Recom_FCY2,Recom_FCY3,Recom_FCY4,Recom_FCY5)
head(Recom_P)
summary(Recom_P$TC)


ggplot(Recom_P, aes(CY/1000, TC)) +
  geom_point()+
  facet_wrap(~FCY)+
  xlab("Current yield (t/ha)")+ ylab("Net revenue") +
  theme_bw()+
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14),
        legend.position = "none", strip.text = element_text(size=14))



ggcynr <- ggplot(Recom_P, aes(CY/1000, NR)) +
  geom_point()+
  facet_wrap(~FCY)+
  xlab("Current yield (t/ha)")+ ylab("Net revenue") +
  theme_bw()+
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14),
        legend.position = "none", strip.text = element_text(size=14))

ggsave("CY_NR_C.pdf", ggcynr)

ggcy <- ggplot(Recom_P, aes(CY/1000, TargetY/1000))+
  geom_point() +
  facet_wrap(~FCY) +
  xlab("Current yield (t/ha)")+ ylab("Target yield (t/ha)") +
  geom_abline(slope = 1, intercept = 0) +
  theme_bw() +
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14),
        legend.position = "none", strip.text = element_text(size=14))

ggsave("CY_TY_C.pdf", ggcy)


ggtcnr <- ggplot(Recom_P[Recom_P$TC > 0, ], aes(TC/1000, NR/1000))+
  geom_point()+
  facet_wrap(~FCY)+ 
  xlab("Total cost") + ylab("Net revenue")+
  theme_bw()+
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14),
      legend.position = "none", strip.text = element_text(size=14))

ggsave("totalCostnetRevenue_C.pdf", ggtcnr)

 ggnr <- ggplot(Recom_P, aes(NR/1000))+
  geom_histogram()+
  facet_wrap(~FCY)+
  ylab("") + xlab("Net revenue")+
  theme_bw()+
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14),
      legend.position = "none", strip.text = element_text(size=14))
ggsave("netRevenue_C.pdf", ggnr)



ggtycy <- ggplot(Recom_P, aes(TargetY/1000 - CY/1000))+
  geom_histogram()+
  facet_wrap(~FCY)+
  xlab("Target Yield  -  Current yield (t/ha)")+
  theme_bw()+
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14),
        legend.position = "none", strip.text = element_text(size=14))
ggsave("targetYieldcurrentYield_C.pdf", ggtycy)


require(plyr)
summaryData <- ddply(Recom_P, .(NAME_1, NAME_2), summarize, currentYield = mean(CY),
      targetYield = mean(TargetY), totalCost = mean(TC), netRevenue = mean(NR))

summaryData[order(summaryData$netRevenue), ]

cy <- ggplot(Recom_P, aes(CY, fill=FCY))+
  geom_histogram()+
  ylab("") + xlab("current yield")+
  theme_bw()+
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14),
         strip.text = element_text(size=14))

ggsave("currentYieldHIst_C.pdf", cy)

head(Recom_P)

ggplot(Recom_P, aes(Urea, fill=FCY))+
  geom_histogram()+
  facet_wrap(~FCY)+
  ylab("") + xlab("current yield")+
  theme_bw()+
  theme(axis.text = element_text(size=12), axis.title = element_text(size=14),
        strip.text = element_text(size=14))


# setwd("D:/ACAI_Wrapper/Rwanda")
setwd("/home/akilimo/projects/SoilNPK/Rwanda")
library(rgdal)
rwdistrict <- readOGR(dsn=getwd(), layer="rwa_adm2_2006_NISR_WGS1984_20181002")
plot(rwdistrict)
plot(rwdistrict[rwdistrict$ADM2_EN %in% PotatoRegion, ], add=TRUE, col="red")

unique(rwdistrict$ADM2_EN)

# lookup_key      	rateUrea	rateDAP  	rateYaraMila_UNIK	currentY	targetY	netRev	totalCost
# E38.525N-6.375p1y2	76	73	166	19.3	42.4	           1658	       189

PotatoRegion <- c("Nyabihu", "Rubavu", "Gicumbi", "Musanze", "Burera","Rutsiro","Gakenke",
                  "Rulindo","Ngororero", "Karongi")

Recom_FCY1 <- Recom_FCY1[Recom_FCY1$NAME_2 %in% PotatoRegion, ]
plot(Recom_FCY1$lon, Recom_FCY1$lat)
Recom_FCY1$lookup_key <- paste("E", Recom_FCY1$lon, "N", Recom_FCY1$lat, "y1", sep="")
Recom_FCY1$rateUrea <- round(Recom_FCY1$Urea, digits = 0)
Recom_FCY1$rateNPK17_17_17 <- round(Recom_FCY1$NPK17_17_17, digits = 0)
Recom_FCY1$rateDAP <- round(Recom_FCY1$DAP, digits = 0)
Recom_FCY1$currentY <- round(Recom_FCY1$CY/1000, digits = 0)
Recom_FCY1$targetY <- round(Recom_FCY1$TargetY/1000, digits = 0)
Recom_FCY1$netRev2 <- round(Recom_FCY1$NR*0.001, digits = 0)
Recom_FCY1$totalCost <- round(Recom_FCY1$TC*0.001, digits = 0)
Recom_FCY1$netRev <- ((Recom_FCY1$targetY - Recom_FCY1$currentY)*200) - Recom_FCY1$totalCost
Recom_FCY1_pr <- Recom_FCY1[, c("lookup_key","rateUrea","rateNPK17_17_17","rateDAP","currentY","targetY","netRev","totalCost")]

Recom_FCY2 <- Recom_FCY2[Recom_FCY2$NAME_2 %in% PotatoRegion, ]
plot(Recom_FCY2$lon, Recom_FCY2$lat)
Recom_FCY2$lookup_key <- paste("E", Recom_FCY2$lon, "N", Recom_FCY2$lat, "y2", sep="")
Recom_FCY2$rateUrea <- round(Recom_FCY2$Urea, digits = 0)
Recom_FCY2$rateNPK17_17_17 <- round(Recom_FCY2$NPK17_17_17, digits = 0)
Recom_FCY2$rateDAP <- round(Recom_FCY2$DAP, digits = 0)
Recom_FCY2$currentY <- round(Recom_FCY2$CY/1000, digits = 0)
Recom_FCY2$targetY <- round(Recom_FCY2$TargetY/1000, digits = 0)
Recom_FCY2$netRev2 <- round(Recom_FCY2$NR*0.001, digits = 0)
Recom_FCY2$totalCost <- round(Recom_FCY2$TC*0.001, digits = 0)
Recom_FCY2$netRev <- ((Recom_FCY2$targetY - Recom_FCY2$currentY)*200) - Recom_FCY2$totalCost
Recom_FCY2_pr <- Recom_FCY2[, c("lookup_key","rateUrea","rateNPK17_17_17","rateDAP","currentY","targetY","netRev","totalCost")]

Recom_FCY3 <- Recom_FCY3[Recom_FCY3$NAME_2 %in% PotatoRegion, ]
Recom_FCY3$lookup_key <- paste("E", Recom_FCY3$lon, "N", Recom_FCY3$lat, "y3", sep="")
Recom_FCY3$rateUrea <- round(Recom_FCY3$Urea, digits = 0)
Recom_FCY3$rateNPK17_17_17 <- round(Recom_FCY3$NPK17_17_17, digits = 0)
Recom_FCY3$rateDAP <- round(Recom_FCY3$DAP, digits = 0)
Recom_FCY3$currentY <- round(Recom_FCY3$CY/1000, digits = 0)
Recom_FCY3$targetY <- round(Recom_FCY3$TargetY/1000, digits = 0)
Recom_FCY3$netRev2 <- round(Recom_FCY3$NR*0.001, digits = 0)
Recom_FCY3$totalCost <- round(Recom_FCY3$TC*0.001, digits = 0)
Recom_FCY3$netRev <- ((Recom_FCY3$targetY - Recom_FCY3$currentY)*200) - Recom_FCY3$totalCost
Recom_FCY3_pr <- Recom_FCY3[, c("lookup_key","rateUrea","rateNPK17_17_17","rateDAP","currentY","targetY","netRev","totalCost")]


Recom_FCY4 <- Recom_FCY4[Recom_FCY4$NAME_2 %in% PotatoRegion, ]
Recom_FCY4$lookup_key <- paste("E", Recom_FCY4$lon, "N", Recom_FCY4$lat, "y4", sep="")
Recom_FCY4$rateUrea <- round(Recom_FCY4$Urea, digits = 0)
Recom_FCY4$rateNPK17_17_17 <- round(Recom_FCY4$NPK17_17_17, digits = 0)
Recom_FCY4$rateDAP <- round(Recom_FCY4$DAP, digits = 0)
Recom_FCY4$currentY <- round(Recom_FCY4$CY/1000, digits = 0)
Recom_FCY4$targetY <- round(Recom_FCY4$TargetY/1000, digits = 0)
Recom_FCY4$netRev2 <- round(Recom_FCY4$NR*0.001, digits = 0)
Recom_FCY4$totalCost <- round(Recom_FCY4$TC*0.001, digits = 0)
Recom_FCY4$netRev <- ((Recom_FCY4$targetY - Recom_FCY4$currentY)*200) - Recom_FCY4$totalCost
Recom_FCY4_pr <- Recom_FCY4[, c("lookup_key","rateUrea","rateNPK17_17_17","rateDAP","currentY","targetY","netRev","totalCost")]


Recom_FCY5 <- Recom_FCY5[Recom_FCY5$NAME_2 %in% PotatoRegion, ]
Recom_FCY5$lookup_key <- paste("E", Recom_FCY5$lon, "N", Recom_FCY5$lat, "y5", sep="")
Recom_FCY5$rateUrea <- round(Recom_FCY5$Urea, digits = 0)
Recom_FCY5$rateNPK17_17_17 <- round(Recom_FCY5$NPK17_17_17, digits = 0)
Recom_FCY5$rateDAP <- round(Recom_FCY5$DAP, digits = 0)
Recom_FCY5$currentY <- round(Recom_FCY5$CY/1000, digits = 0)
Recom_FCY5$targetY <- round(Recom_FCY5$TargetY/1000, digits = 0)
Recom_FCY5$netRev2 <- round(Recom_FCY5$NR*0.001, digits = 0)
Recom_FCY5$totalCost <- round(Recom_FCY5$TC*0.001, digits = 0)
Recom_FCY5$netRev <- ((Recom_FCY5$targetY - Recom_FCY5$currentY)*200) - Recom_FCY5$totalCost
Recom_FCY5_pr <- Recom_FCY5[, c("lookup_key","rateUrea","rateNPK17_17_17","rateDAP","currentY","targetY","netRev","totalCost")]


Recom_FCY_RW_Potato <- rbind(Recom_FCY1_pr, Recom_FCY2_pr, Recom_FCY3_pr, Recom_FCY4_pr, Recom_FCY5_pr)
write.csv(Recom_FCY_RW_Potato, "Recom_FCY_RW_PotatoRegion_v4.csv", row.names = FALSE)


write.csv(Recom_FCY_RW_Potato, "/home/akilimo/projects/SoilNPK/Rwanda/Recom_FCY_RW_213USD.csv", row.names = FALSE)

nrow(Recom_FCY_RW_Potato)


# setwd("D:/ACAI_Wrapper/Rwanda/Potato")
setwd("/home/akilimo/projects/SoilNPK/Rwanda/Potato")
Recom_FCY_RW_Potato <- read.csv("/home/akilimo/projects/SoilNPK/Rwanda/Recom_FCY_RW_213USD.csv")

head(Recom_FCY_RW_Potato)
Recom_FCY_RW_Potato[Recom_FCY_RW_Potato$rateDAP == 0 &
                      Recom_FCY_RW_Potato$rateNPK17_17_17 == 0 & 
                      Recom_FCY_RW_Potato$rateUrea == 0, ]


#### Arifu data

Arifu_potato <- function(fcydata, FCY){
  fcydata$YieldInc <- round((fcydata$TargetY - fcydata$CY)/1000, digits = 2)
  
  FCYArifuPotato <- ddply(fcydata, .(NAME_1, NAME_2), summarise, rateUrea = mean(Urea), rateNPK17_17_17 = mean(NPK17_17_17),
                           rateDAP = mean(DAP), DY = mean(YieldInc))
  FCYArifuPotato <- merge(FCYArifuPotato, data.frame(month = c(1:12)))
  FCYArifuPotato$FCY <- FCY
  FCYArifuPotato <- FCYArifuPotato[, c("NAME_1", "NAME_2","month","rateUrea","rateNPK17_17_17","rateDAP","DY","FCY")]
  colnames(FCYArifuPotato) <- c("HASC1","HASC2","month","rateUrea","rateNPK171717","rateDAP","DY","FCY")
  return(FCYArifuPotato)
}


FCY1arifu <- Arifu_potato(Arifu_FCY1, FCY=1)
FCY2arifu <- Arifu_potato(Arifu_FCY2, FCY=2)
FCY3arifu <- Arifu_potato(Arifu_FCY3, FCY=3)
FCY4arifu <- Arifu_potato(Arifu_FCY4, FCY=4)
FCY5arifu <- Arifu_potato(Arifu_FCY5, FCY=5)


ArifuPotato <- rbind(FCY1arifu, FCY2arifu, FCY3arifu, FCY4arifu, FCY5arifu)
str(ArifuPotato)

write.csv(ArifuPotato, 'ArifuPotato.csv', row.names = FALSE)


install.packages("RCurl", dependencies = TRUE)
library("RCurl")
scp("157.245.26.55", "/home/akilimo/projects/AKILIMO_Modeling/DefineCoordinates/output/Burundi_Coordinates.csv", 
    keypasswd = "akilimo_101!", user="akilimo") 


















