## 2026-03-03 The purpose of this script is to generate simulated datasets and
## run the clustord algorithm with various different models and save the outputs
## of the algorithm. The saved results will then be used as fixed output objects
## to test the reorder() function in its unit tests.
## The original reorder() unit tests performed the dataset generation and the
## clustord fitting themselves, but this created discrepancies between the
## clustord results on MacOS vs. Windows/Linux.
## Note that the clustord runs in this script are not intended to run until
## convergence, because that is not needed for testing the application of reorder()
## to the results. So it is expected behaviour that every run here will result
## in a warning about the algorithm failing to converge.
## BE AWARE: if you change ANY OF THIS CODE then you may need to also change the
## code inside the scripts test_reorder_row.R, test_reorder_col.R and
## test_reorder_bi.R, each of which checks the reordered output against the
## manually ordered objects in the files generated by this script.

# Row clustering tests ------
# Constraint sum to zero -----
# Ordinal dataset simulation ----
set.seed(30)
n <- 30
p <- 5
long_df_sim <- data.frame(Y=factor(sample(1:3,n*p,replace=TRUE)),
                          ROW=rep(1:n,times=p),COL=rep(1:p,each=n))

## Make sure to test continuous and categorical covariates
xr1 <- runif(n, min=0, max=2)
xr2 <- sample(c("A","B"),size=n, replace=TRUE, prob=c(0.3,0.7))
xr3 <- factor(sample(1:4, size=n, replace=TRUE))

xc1 <- runif(p, min=-1, max=1)

long_df_sim$xr1 <- rep(xr1, times=5)
long_df_sim$xr2 <- rep(xr2, times=5)
long_df_sim$xr3 <- rep(xr3, times=5)
long_df_sim$xc1 <- rep(xc1, each=30)

# OSM results --------------------------------------------------------------
## Model 1 ----
## NOTE! Need to use keep_all_params=TRUE in order to actually have some output
## to reorder in EMstatus$params_every_iteration
orig_standard_OSM1 <- clustord(Y~ROWCLUST*xr1+xr2*xr3+COL, model="OSM", RG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_standard_OSM2 <- clustord(Y~ROWCLUST*COL+xc1, model="OSM", RG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_standard_OSM3 <- clustord(Y~ROWCLUST*xc1, model="OSM", RG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# POM results --------------------------------------------------------------
## Model 1 ----
orig_standard_POM1 <- clustord(Y~ROWCLUST*xr1+xr2*xr3+COL, model="POM", RG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_standard_POM2 <- clustord(Y~ROWCLUST*COL+xc1, model="POM", RG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_standard_POM3 <- clustord(Y~ROWCLUST*xc1, model="POM", RG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))


# Binary dataset simulation ----
# Binary results ----------------------------------------------------------
long_df_sim <- data.frame(Y=factor(sample(1:2,n*p,replace=TRUE)),
                          ROW=rep(1:n,times=p),COL=rep(1:p,each=n))
n <- 30
p <- 5

## Make sure to test continuous and categorical covariates
long_df_sim$xr1 <- rep(xr1, times=5)
long_df_sim$xr2 <- rep(xr2, times=5)
long_df_sim$xr3 <- rep(xr3, times=5)
long_df_sim$xc1 <- rep(xc1, each=30)

## Model 1 ----
orig_standard_Binary1 <- clustord(Y~ROWCLUST*xr1+xr2*xr3+COL, model="Binary", RG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_standard_Binary2 <- clustord(Y~ROWCLUST*COL+xc1, model="Binary", RG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_standard_Binary3 <- clustord(Y~ROWCLUST*xc1, model="Binary", RG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# Constraint first element zero -----
# Ordinal dataset simulation ----
set.seed(30)
n <- 30
p <- 5
long_df_sim <- data.frame(Y=factor(sample(1:3,n*p,replace=TRUE)),
                          ROW=rep(1:n,times=p),COL=rep(1:p,each=n))

## Make sure to test continuous and categorical covariates
xr1 <- runif(n, min=0, max=2)
xr2 <- sample(c("A","B"),size=n, replace=TRUE, prob=c(0.3,0.7))
xr3 <- factor(sample(1:4, size=n, replace=TRUE))

xc1 <- runif(p, min=-1, max=1)

long_df_sim$xr1 <- rep(xr1, times=5)
long_df_sim$xr2 <- rep(xr2, times=5)
long_df_sim$xr3 <- rep(xr3, times=5)
long_df_sim$xc1 <- rep(xc1, each=30)

## NOTE: Using RG = 4 here (compared with RG = 3 above)
## because for RG = 3 with first cluster effect set to 0 there are
## only 2 possible orderings of the non-zero cluster effects, so always one
## of the increasing or decreasing order will be the same as the original
## model ordering.
## Increasing to 4 clusters increases the chance of having both directions
## be different from the original ordering

# OSM results --------------------------------------------------------------
## Model 1 ----
orig_first_elt_OSM1 <- clustord(Y~ROWCLUST*xr1+xr2*xr3+COL, model="OSM", RG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_first_elt_OSM2 <- clustord(Y~ROWCLUST*COL+xc1, model="OSM", RG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_first_elt_OSM3 <- clustord(Y~ROWCLUST*xc1, model="OSM", RG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# POM results --------------------------------------------------------------
## Model 1 ----
orig_first_elt_POM1 <- clustord(Y~ROWCLUST*xr1+xr2*xr3+COL, model="POM", RG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_first_elt_POM2 <- clustord(Y~ROWCLUST*COL+xc1, model="POM", RG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_first_elt_POM3 <- clustord(Y~ROWCLUST*xc1, model="POM", RG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# Binary dataset simulation ----
# Binary results ----------------------------------------------------------
set.seed(1)
long_df_sim <- data.frame(Y=factor(sample(1:2,n*p,replace=TRUE)),
                          ROW=rep(1:n,times=p),COL=rep(1:p,each=n))
n <- 30
p <- 5

## Make sure to test continuous and categorical covariates
long_df_sim$xr1 <- rep(xr1, times=5)
long_df_sim$xr2 <- rep(xr2, times=5)
long_df_sim$xr3 <- rep(xr3, times=5)
long_df_sim$xc1 <- rep(xc1, each=30)

## Model 1 ----
orig_first_elt_Binary1 <- clustord(Y~ROWCLUST*xr1+xr2*xr3+COL, model="Binary", RG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_first_elt_Binary2 <- clustord(Y~ROWCLUST*COL+xc1, model="Binary", RG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_first_elt_Binary3 <- clustord(Y~ROWCLUST*xc1, model="Binary", RG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

save(orig_standard_OSM1, orig_standard_OSM2, orig_standard_OSM3,
     orig_standard_POM1, orig_standard_POM2, orig_standard_POM3,
     orig_standard_Binary1, orig_standard_Binary2, orig_standard_Binary3,
     orig_first_elt_OSM1, orig_first_elt_OSM2, orig_first_elt_OSM3,
     orig_first_elt_POM1, orig_first_elt_POM2, orig_first_elt_POM3,
     orig_first_elt_Binary1, orig_first_elt_Binary2, orig_first_elt_Binary3,
     file="tests/testthat/reorder_models_row.Rdata")


# Column clustering tests ------
# Constraint sum to zero -----
# Ordinal dataset simulation ----
set.seed(30)
n <- 5
p <- 30
long_df_sim <- data.frame(Y=factor(sample(1:3,n*p,replace=TRUE)),
                          ROW=rep(1:n,times=p),COL=rep(1:p,each=n))

## Make sure to test continuous and categorical covariates
xc1 <- runif(p, min=0, max=2)
xc2 <- sample(c("A","B"),size=p, replace=TRUE, prob=c(0.3,0.7))
xc3 <- sample(1:4, size=p, replace=TRUE)

xr1 <- runif(n, min=-1, max=1)

long_df_sim$xc1 <- rep(xc1, times=5)
long_df_sim$xc2 <- rep(xc2, times=5)
long_df_sim$xc3 <- rep(xc3, times=5)
long_df_sim$xr1 <- rep(xr1, each=30)

# OSM results --------------------------------------------------------------
## Model 1 ----
## NOTE! Need to use keep_all_params=TRUE in order to actually have some output
## to reorder in EMstatus$params_every_iteration
orig_standard_OSM1 <- clustord(Y~COLCLUST*xc1+xc2*xc3+ROW, model="OSM", CG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_standard_OSM2 <- clustord(Y~COLCLUST*ROW+xr1, model="OSM", CG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_standard_OSM3 <- clustord(Y~COLCLUST*xr1, model="OSM", CG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# POM results --------------------------------------------------------------
## Model 1 ----
orig_standard_POM1 <- clustord(Y~COLCLUST*xc1+xc2*xc3+ROW, model="POM", CG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_standard_POM2 <- clustord(Y~COLCLUST*ROW+xr1, model="POM", CG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_standard_POM3 <- clustord(Y~COLCLUST*xr1, model="POM", CG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# Binary dataset simulation ----
# Binary results ----------------------------------------------------------
set.seed(50)
long_df_sim <- data.frame(Y=factor(sample(1:2,n*p,replace=TRUE)),
                          ROW=rep(1:n,times=p),COL=rep(1:p,each=n))
n <- 30
p <- 5

## Make sure to test continuous and categorical covariates
long_df_sim$xc1 <- rep(xc1, times=5)
long_df_sim$xc2 <- rep(xc2, times=5)
long_df_sim$xc3 <- rep(xc3, times=5)
long_df_sim$xr1 <- rep(xr1, each=30)

## Model 1 ----
orig_standard_Binary1 <- clustord(Y~COLCLUST*xc1+xc2*xc3+ROW, model="Binary", CG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_standard_Binary2 <- clustord(Y~COLCLUST*ROW+xr1, model="Binary", CG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_standard_Binary3 <- clustord(Y~COLCLUST*xr1, model="Binary", CG=3,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = TRUE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# Constraint first element zero ----
# Ordinal dataset simulation ----
set.seed(30)
n <- 5
p <- 30
long_df_sim <- data.frame(Y=factor(sample(1:3,n*p,replace=TRUE)),
                          ROW=rep(1:n,times=p),COL=rep(1:p,each=n))

## Make sure to test continuous and categorical covariates
xc1 <- runif(p, min=0, max=2)
xc2 <- sample(c("A","B"),size=p, replace=TRUE, prob=c(0.3,0.7))
xc3 <- sample(1:4, size=p, replace=TRUE)

xr1 <- runif(n, min=-1, max=1)

long_df_sim$xc1 <- rep(xc1, times=5)
long_df_sim$xc2 <- rep(xc2, times=5)
long_df_sim$xc3 <- rep(xc3, times=5)
long_df_sim$xr1 <- rep(xr1, each=30)

## NOTE: Using CG = 4 here (compared with CG = 3 above)
## because for CG = 3 with first cluster effect set to 0 there are
## only 2 possible orderings of the non-zero cluster effects, so always one
## of the increasing or decreasing order will be the same as the original
## model ordering.
## Increasing to 4 clusters increases the chance of having both directions
## be different from the original ordering

# OSM results --------------------------------------------------------------
## Model 1 ----
orig_first_elt_OSM1 <- clustord(Y~COLCLUST*xc1+xc2*xc3+ROW, model="OSM", CG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_first_elt_OSM2 <- clustord(Y~COLCLUST*ROW+xr1, model="OSM", CG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_first_elt_OSM3 <- clustord(Y~COLCLUST*xr1, model="OSM", CG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# POM results --------------------------------------------------------------
## Model 1 ----
orig_first_elt_POM1 <- clustord(Y~COLCLUST*xc1+xc2*xc3+ROW, model="POM", CG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_first_elt_POM2 <- clustord(Y~COLCLUST*ROW+xr1, model="POM", CG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_first_elt_POM3 <- clustord(Y~COLCLUST*xr1, model="POM", CG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# Binary dataset simulation ----
set.seed(1)
long_df_sim <- data.frame(Y=factor(sample(1:2,n*p,replace=TRUE)),
                          ROW=rep(1:n,times=p),COL=rep(1:p,each=n))
n <- 30
p <- 5

## Make sure to test continuous and categorical covariates
long_df_sim$xc1 <- rep(xc1, times=5)
long_df_sim$xc2 <- rep(xc2, times=5)
long_df_sim$xc3 <- rep(xc3, times=5)
long_df_sim$xr1 <- rep(xr1, each=30)

# Binary results ----------------------------------------------------------
## Model 1 ----
orig_first_elt_Binary1 <- clustord(Y~COLCLUST*xc1+xc2*xc3+ROW, model="Binary", CG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 2 ----
orig_first_elt_Binary2 <- clustord(Y~COLCLUST*ROW+xr1, model="Binary", CG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

## Model 3 ----
orig_first_elt_Binary3 <- clustord(Y~COLCLUST*xr1, model="Binary", CG=4,
                 long_df=long_df_sim, nstarts=1, constraint_sum_zero = FALSE,
                 control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

save(orig_standard_OSM1, orig_standard_OSM2, orig_standard_OSM3,
     orig_standard_POM1, orig_standard_POM2, orig_standard_POM3,
     orig_standard_Binary1, orig_standard_Binary2, orig_standard_Binary3,
     orig_first_elt_OSM1, orig_first_elt_OSM2, orig_first_elt_OSM3,
     orig_first_elt_POM1, orig_first_elt_POM2, orig_first_elt_POM3,
     orig_first_elt_Binary1, orig_first_elt_Binary2, orig_first_elt_Binary3,
     file="tests/testthat/reorder_models_col.Rdata")

# Biclustering tests ----
# Constraint sum to zero ----
# Ordinal dataset simulation ----

## Check that reorder() produces correctly reordered results
set.seed(30)
n <- 30
p <- 30
long_df_sim <- data.frame(Y=factor(sample(1:3,n*p,replace=TRUE)),
                          ROW=rep(1:n,times=p),COL=rep(1:p,each=n))

## Make sure to test continuous and categorical covariates
xr1 <- runif(n, min=0, max=2)
xr2 <- sample(c("A","B"),size=n, replace=TRUE, prob=c(0.3,0.7))

xc1 <- factor(sample(1:4, size=p, replace=TRUE))
xc2 <- runif(p, min=-1, max=1)

long_df_sim$xr1 <- rep(xr1, each=p)
long_df_sim$xr2 <- rep(xr2, each=p)
long_df_sim$xc1 <- rep(xc1, times=n)
long_df_sim$xc2 <- rep(xc2, times=n)

# OSM results --------------------------------------------------------------
## Model 1 ----
orig_standard_OSM1 <- clustord(Y~ROWCLUST*COLCLUST+ROWCLUST:(xr1+xc1)+COLCLUST:(xr2+xc2)+xr1^2, model="OSM",
                 RG=3, CG=3, nstarts=1, constraint_sum_zero = TRUE,
                 long_df=long_df_sim, control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))


# POM results --------------------------------------------------------------
## Model 1 ----
orig_standard_POM1 <- clustord(Y~ROWCLUST*COLCLUST+ROWCLUST:(xr1+xc1)+COLCLUST:(xr2+xc2)+xr1^2, model="POM",
                 RG=3, CG=3, nstarts=1, constraint_sum_zero = TRUE,
                 long_df=long_df_sim, control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# Constraint first element zero ----
# Ordinal dataset simulation ----
set.seed(30)
n <- 30
p <- 30
long_df_sim <- data.frame(Y=factor(sample(1:3,n*p,replace=TRUE)),
                          ROW=rep(1:n,times=p),COL=rep(1:p,each=n))

## Make sure to test continuous and categorical covariates
xr1 <- runif(n, min=0, max=2)
xr2 <- sample(c("A","B"),size=n, replace=TRUE, prob=c(0.3,0.7))

xc1 <- factor(sample(1:4, size=p, replace=TRUE))
xc2 <- runif(p, min=-1, max=1)

long_df_sim$xr1 <- rep(xr1, each=p)
long_df_sim$xr2 <- rep(xr2, each=p)
long_df_sim$xc1 <- rep(xc1, times=n)
long_df_sim$xc2 <- rep(xc2, times=n)

## NOTE: Using RG = 4 and CG = 4 here (compared with
## RG = 3, CG = 3 above) because for 3 clusters with first
## cluster effect set to 0 there are only 2 possible orderings of the
## non-zero cluster effects, so always one of the increasing or decreasing
## order will be the same as the original model ordering. Increasing to 4
## clusters increases the chance of having both directions be different from
## the original ordering

# OSM results --------------------------------------------------------------
## Model 1 ----
orig_first_elt_OSM1 <- clustord(Y~ROWCLUST*COLCLUST+ROWCLUST:(xr1+xc1)+COLCLUST:(xr2+xc2)+xr1^2, model="OSM",
                 RG=4, CG=4, nstarts=1, constraint_sum_zero = FALSE,
                 long_df=long_df_sim, control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

# POM results --------------------------------------------------------------
## Model 1 ----
orig_first_elt_POM1 <- clustord(Y~ROWCLUST*COLCLUST+ROWCLUST:(xr1+xc1)+COLCLUST:(xr2+xc2)+xr1^2, model="POM",
                 RG=4, CG=4, nstarts=1, constraint_sum_zero = FALSE,
                 long_df=long_df_sim, control_EM=list(maxiter=3,maxiter_start=2,keep_all_params=TRUE))

save(orig_standard_OSM1, orig_standard_POM1,
     orig_first_elt_OSM1, orig_first_elt_POM1,
     file="tests/testthat/reorder_models_bi.Rdata")