% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dfr_kldist.R
\name{dfr_kldist}
\alias{dfr_kldist}
\title{KL Distance method}
\usage{
dfr_kldist(target_feat, window_size = 100, p_th = 0.9, data = NULL)
}
\arguments{
\item{target_feat}{Feature to be monitored.}

\item{window_size}{Size of the sliding window (must be > 2*stat_size)}

\item{p_th}{Probability theshold for the test statistic of the Kullback Leibler distance.}

\item{data}{Already collected data to avoid cold start.}
}
\value{
\code{dfr_kldist} object
}
\description{
Kullback Leibler Windowing method for concept drift detection.
}
\examples{
require("daltoolbox")
require('ggplot2')
require('caret')

data("st_real_examples")

bfd <- st_real_examples$bfd1
bfd['batch_index'] <- format(bfd['expected_depart'], '\%V')
bfd <- bfd[bfd['depart'] == 'SBSP',]

# Model features
features <- c(
 'depart_elevation', 'depart_visibility', 'depart_day_period', 'depart_pressure', 
 'depart_relative_humidity', 'depart_dew_point', 'depart_wind_speed_scale'
)

## Target
bfd$delay_depart_bin <- bfd$delay_depart > 0
target = 'delay_depart_bin'
bfd = bfd[complete.cases(bfd[target]),]
slevels <- c(TRUE, FALSE)

# Evaluation
th=0.5

results <- c()
ordered_batches <- sort(unique(bfd$batch_index))
old_start_batch <- ordered_batches[1]

# Classification Algorithm
model <- stealthy(daltoolbox::cla_nb(target, slevels), 
dfr_kldist(target_feat='depart_visibility', p_th=0.2))

for (batch in ordered_batches[2:length(ordered_batches)]){
 print(batch)
 print(old_start_batch)
 
 new_batch <- bfd[bfd$batch_index == batch,]
 last_batch <- bfd[(bfd$batch_index < batch) & (bfd$batch_index >= old_start_batch),]
 
 old_start_batch <- batch
 
 x_train <- last_batch[, features]
 y_train <- last_batch[, target, drop=FALSE]
 
 x_test <- new_batch[, features]
 y_test <- new_batch[, target]
 
 model <- fit(model, x_train, y_train)
 
 test_predictions <- predict(model, x_test)
 y_pred <- test_predictions[, 2] > th
 
 # Evaluation
 precision <- evaluate(mt_precision(), y_pred, y_test)
 recall <- evaluate(mt_recall(), y_pred, y_test)
 f1 <- evaluate(mt_fscore(), y_pred, y_test)
 
 results <- rbind(results, 
                  c(
                    batch,
                    precision,
                    recall,
                    f1,
                    model$drifted
                  )
 )
 
 print(nrow(model$x_train))
 print(nrow(new_batch))
}
results <- as.data.frame(results)
results['index'] <- as.Date(results$index)
names(results) <- c('index', 'precision', 'recall', 'f1', 'drift')
results[, length(names(results))] <- NULL

ggplot(data=results, aes(x=index, y=as.numeric(f1), group=1)) + 
 geom_line() +
 xlab('') +
 ylab('F1') +
 geom_vline(xintercept = results[results['drift']==TRUE, 'index'],
            linetype="dotted", 
            color = "red", linewidth=0.5) +
 theme_classic()

}
