The explore package offers a simplified way to use popular data sets or to create synthetic data for experimenting/teaching/training.
This data set comes with the palmerpenguins package. It contains measurements for penguin species, island in Palmer Archipelago, size (flipper length, body mass, bill dimensions), and sex.
library(dplyr)
library(explore)
data <- use_data_penguins()
glimpse(data)
#> Rows: 344
#> Columns: 8
#> $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adel~
#> $ island <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgerse~
#> $ bill_length_mm <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, ~
#> $ bill_depth_mm <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, ~
#> $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186~
#> $ body_mass_g <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, ~
#> $ sex <fct> male, female, female, NA, female, male, female, male~
#> $ year <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007~
data <- use_data_penguins(short_names = TRUE)
glimpse(data)
#> Rows: 344
#> Columns: 8
#> $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Ad~
#> $ island <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgersen, Tor~
#> $ bill_len <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, 42.0, ~
#> $ bill_dep <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, 20.2, ~
#> $ flipper_len <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186, 180,~
#> $ body_mass <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, 4250, ~
#> $ sex <fct> male, female, female, NA, female, male, female, male, NA, ~
#> $ year <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007~
This data set comes with the dplyr package. It contains data of 87 star war characters.
data <- use_data_starwars()
glimpse(data)
#> Rows: 87
#> Columns: 14
#> $ name <chr> "Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Or~
#> $ height <int> 172, 167, 96, 202, 150, 178, 165, 97, 183, 182, 188, 180, 2~
#> $ mass <dbl> 77.0, 75.0, 32.0, 136.0, 49.0, 120.0, 75.0, 32.0, 84.0, 77.~
#> $ hair_color <chr> "blond", NA, NA, "none", "brown", "brown, grey", "brown", N~
#> $ skin_color <chr> "fair", "gold", "white, blue", "white", "light", "light", "~
#> $ eye_color <chr> "blue", "yellow", "red", "yellow", "brown", "blue", "blue",~
#> $ birth_year <dbl> 19.0, 112.0, 33.0, 41.9, 19.0, 52.0, 47.0, NA, 24.0, 57.0, ~
#> $ sex <chr> "male", "none", "none", "male", "female", "male", "female",~
#> $ gender <chr> "masculine", "masculine", "masculine", "masculine", "femini~
#> $ homeworld <chr> "Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", "T~
#> $ species <chr> "Human", "Droid", "Droid", "Human", "Human", "Human", "Huma~
#> $ films <list> <"The Empire Strikes Back", "Revenge of the Sith", "Return~
#> $ vehicles <list> <"Snowspeeder", "Imperial Speeder Bike">, <>, <>, <>, "Imp~
#> $ starships <list> <"X-wing", "Imperial shuttle">, <>, <>, "TIE Advanced x1",~
This data set comes with the ggplot2 package. It contains the prices and other attributes of almost 54,000 diamonds.
data <- use_data_diamonds()
glimpse(data)
#> Rows: 53,940
#> Columns: 10
#> $ carat <dbl> 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, 0.~
#> $ cut <ord> Ideal, Premium, Good, Premium, Good, Very Good, Very Good, Ver~
#> $ color <ord> E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E, I, J, J, J, I,~
#> $ clarity <ord> SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, VS1, SI1, VS1, ~
#> $ depth <dbl> 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, 64~
#> $ table <dbl> 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58~
#> $ price <int> 326, 326, 327, 334, 335, 336, 336, 337, 337, 338, 339, 340, 34~
#> $ x <dbl> 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, 3.87, 4.00, 4.~
#> $ y <dbl> 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.~
#> $ z <dbl> 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.~
This data set comes with base R. The data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica.
data <- use_data_iris()
glimpse(data)
#> Rows: 150
#> Columns: 5
#> $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.~
#> $ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.~
#> $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.~
#> $ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.~
#> $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s~
This data set comes with the ggplot2 package. It contains a subset of the fuel economy data that the EPA makes available on https://fueleconomy.gov/. It contains only models which had a new release every year between 1999 and 2008 - this was used as a proxy for the popularity of the car.
data <- use_data_mpg()
glimpse(data)
#> Rows: 234
#> Columns: 11
#> $ manufacturer <chr> "audi", "audi", "audi", "audi", "audi", "audi", "audi", "~
#> $ model <chr> "a4", "a4", "a4", "a4", "a4", "a4", "a4", "a4 quattro", "~
#> $ displ <dbl> 1.8, 1.8, 2.0, 2.0, 2.8, 2.8, 3.1, 1.8, 1.8, 2.0, 2.0, 2.~
#> $ year <int> 1999, 1999, 2008, 2008, 1999, 1999, 2008, 1999, 1999, 200~
#> $ cyl <int> 4, 4, 4, 4, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 8, 8, ~
#> $ trans <chr> "auto(l5)", "manual(m5)", "manual(m6)", "auto(av)", "auto~
#> $ drv <chr> "f", "f", "f", "f", "f", "f", "f", "4", "4", "4", "4", "4~
#> $ cty <int> 18, 21, 20, 21, 16, 18, 18, 18, 16, 20, 19, 15, 17, 17, 1~
#> $ hwy <int> 29, 29, 31, 30, 26, 26, 27, 26, 25, 28, 27, 25, 25, 25, 2~
#> $ fl <chr> "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p~
#> $ class <chr> "compact", "compact", "compact", "compact", "compact", "c~
This data set comes with base R. The data was extracted from the 1974 Motor Trend US magazine, and comprises fuel consumption and 10 aspects of automobile design and performance for 32 automobiles (1973–74 models).
data <- use_data_mtcars()
glimpse(data)
#> Rows: 32
#> Columns: 11
#> $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,~
#> $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,~
#> $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16~
#> $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180~
#> $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,~
#> $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.~
#> $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18~
#> $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,~
#> $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,~
#> $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,~
#> $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,~
This data set comes with base R. Survival of passengers on the Titanic.
data <- use_data_titanic(count = FALSE)
glimpse(data)
#> Rows: 2,201
#> Columns: 4
#> $ Class <chr> "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd"~
#> $ Sex <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male~
#> $ Age <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"~
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "~
data <- use_data_titanic(count = TRUE)
glimpse(data)
#> Rows: 32
#> Columns: 5
#> $ Class <chr> "1st", "2nd", "3rd", "Crew", "1st", "2nd", "3rd", "Crew", "1s~
#> $ Sex <chr> "Male", "Male", "Male", "Male", "Female", "Female", "Female",~
#> $ Age <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"~
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "~
#> $ n <dbl> 0, 0, 35, 0, 0, 0, 17, 0, 118, 154, 387, 670, 4, 13, 89, 3, 5~
This data set is an incomplete collection of popular beers in Austria, Germany and Switzerland. Data are collected from various websites in 2023. Some of the collected data may be incorrect.
data <- use_data_beer()
glimpse(data)
#> Rows: 161
#> Columns: 11
#> $ name <chr> "Puntigamer Maerzen", "Puntigamer PR0,0ST", "Puntiga~
#> $ brand <chr> "Puntigamer", "Puntigamer", "Puntigamer", "Puntigame~
#> $ country <chr> "Austria", "Austria", "Austria", "Austria", "Austria~
#> $ year <dbl> 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023~
#> $ type <chr> "Rest", "Alkoholfrei", "Rest", "Rest", "Rest", "Rest~
#> $ color_dark <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1~
#> $ alcohol_vol_pct <dbl> 5.1, 0.0, 5.2, 6.0, 4.9, 5.2, 4.4, 0.5, 5.7, 5.3, 7.~
#> $ original_wort <dbl> 11.5, 5.1, 12.1, 13.8, 11.5, 11.9, 11.1, 7.0, 13.2, ~
#> $ energy_kcal_100ml <dbl> 40, 20, 43, 50, 42, 43, 42, 27, 48, 45, 58, 45, 43, ~
#> $ carb_g_100ml <dbl> 2.7, 4.4, 2.9, 3.6, 3.2, 3.0, 3.8, 5.7, 3.5, 3.3, 3.~
#> $ sugar_g_100ml <dbl> 0.0, 1.2, 0.0, 0.0, 0.0, 0.0, 0.0, 2.7, 0.0, 0.0, 0.~
Artificial data that can be used for unit-testing or teaching.
data <- create_data_app(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 7
#> $ os <chr> "Android", "iOS", "Android", "iOS", "Other", "Android", "~
#> $ free <int> 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, ~
#> $ downloads <int> 5802, 5048, 4579, 3449, 2464, 11276, 4026, 6841, 10419, 5~
#> $ rating <dbl> 4, 4, 3, 4, 1, 4, 5, 5, 4, 1, 1, 4, 4, 5, 5, 4, 3, 4, 2, ~
#> $ type <chr> "Kids", "Media", "Other", "Shopping", "Connect", "Learn",~
#> $ updates <dbl> 63.00000, 58.00000, 62.00000, 44.00000, 24.00000, 75.0000~
#> $ screen_sizes <dbl> 3, 2, 3, 2, 1, 3, 1, 2, 2, 3, 1, 3, 2, 1, 3, 1, 4, 5, 3, ~
data <- create_data_buy(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 13
#> $ period <int> 202012, 202012, 202012, 202012, 202012, 202012, 202012~
#> $ buy <int> 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, ~
#> $ age <int> 39, 57, 55, 66, 71, 44, 64, 51, 70, 44, 58, 47, 68, 71~
#> $ city_ind <int> 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, ~
#> $ female_ind <int> 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, ~
#> $ fixedvoice_ind <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ~
#> $ fixeddata_ind <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
#> $ fixedtv_ind <int> 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, ~
#> $ mobilevoice_ind <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, ~
#> $ mobiledata_prd <chr> "NO", "NO", "MOBILE STICK", "NO", "BUSINESS", "BUSINES~
#> $ bbi_speed_ind <int> 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, ~
#> $ bbi_usg_gb <int> 77, 49, 53, 44, 55, 93, 50, 64, 63, 87, 45, 45, 70, 79~
#> $ hh_single <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, ~
data <- create_data_churn(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 9
#> $ price <dbl> 29, 27, 29, 11, 18, 21, 19, 13, 29, 22, 13, 27, 17, 11, 16,~
#> $ type <chr> "Premium", "Regular", "Premium", "Promo", "Promo", "Promo",~
#> $ usage <dbl> 63.0, 39.0, 87.0, 29.0, 22.5, 8.0, 56.0, 94.5, 46.0, 76.0, ~
#> $ shared <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,~
#> $ device <chr> "Computer", "Tablet", "Phone", "Tablet", "Computer", "Table~
#> $ newsletter <int> 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,~
#> $ language <chr> "sp", "sp", "sp", "sp", "en", "en", "fr", "en", "en", "de",~
#> $ duration <int> 7, 47, 99, 33, 94, 17, 95, 92, 43, 16, 62, 14, 52, 20, 76, ~
#> $ churn <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,~
data <- create_data_esoteric(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 6
#> $ starsign <chr> "Leo", "Aquarius", "Virgo", "Pisces", "Aries", "Taurus~
#> $ chinese <chr> "Dragon", "Monkey", "Tiger", "Pig", "Pig", "Horse", "D~
#> $ moon <chr> "Waxing (+)", "Waxing (+)", "Waxing (+)", "Waning (-)"~
#> $ blood <chr> "A+", "AB+", "0+", "0+", "A+", "0+", "B+", "0+", "0-",~
#> $ fingers_crossed <int> 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, ~
#> $ success <int> 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, ~
data <- create_data_person(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 15
#> $ age <int> 46, 94, 66, 29, 82, 57, 65, 58, 29, 40, 84, 72, 24, ~
#> $ gender <chr> "Female", "Female", "Male", "Male", "Female", "Femal~
#> $ eye_color <chr> "Blue", "Green", "Brown", "Green", "Brown", "Brown",~
#> $ shoe_size <dbl> 45.2, 37.0, 45.0, 45.0, 39.0, 38.2, 41.2, 46.0, 40.0~
#> $ iq <dbl> 141, 71, 80, 74, 119, 95, 97, 135, 88, 140, 71, 126,~
#> $ education <int> 66, 41, 49, 49, 25, 68, 87, 46, 78, 14, 65, 62, 68, ~
#> $ income <dbl> 132.0, 95.0, 18.0, 54.0, 70.0, 128.0, 128.5, 32.0, 8~
#> $ handset <chr> "Apple", "Apple", "Apple", "Android", "Apple", "Andr~
#> $ pet <chr> "No", "Cat", "Other", "No", "Dog", "No", "Cat", "Dog~
#> $ favorite_pizza <chr> "Pepperoni", "Hawai", "Margaritha", "Carciofi", "Mar~
#> $ favorite_icecream <chr> "Lemon", "Strawberry", "Vanilla", "Vanilla", "Apple"~
#> $ likes_garlic <int> 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0~
#> $ likes_sushi <int> 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1~
#> $ likes_beatles <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0~
#> $ likes_beer <int> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1~
data <- create_data_random(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 12
#> $ id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ~
#> $ target_ind <int> 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,~
#> $ var_1 <int> 27, 59, 16, 85, 85, 48, 77, 30, 7, 44, 46, 34, 19, 51, 2, 7~
#> $ var_2 <int> 16, 14, 15, 51, 49, 62, 45, 6, 1, 22, 85, 27, 60, 61, 99, 1~
#> $ var_3 <int> 21, 94, 38, 63, 18, 66, 73, 50, 87, 83, 98, 67, 64, 5, 19, ~
#> $ var_4 <int> 30, 83, 59, 81, 29, 14, 89, 1, 57, 97, 27, 98, 4, 26, 26, 9~
#> $ var_5 <int> 25, 99, 72, 65, 24, 9, 30, 54, 78, 27, 32, 95, 49, 97, 85, ~
#> $ var_6 <int> 44, 40, 37, 53, 7, 72, 24, 84, 100, 11, 49, 68, 82, 77, 43,~
#> $ var_7 <int> 93, 59, 8, 85, 3, 81, 39, 14, 67, 62, 45, 81, 87, 99, 40, 3~
#> $ var_8 <int> 58, 49, 74, 23, 75, 82, 10, 28, 2, 60, 99, 85, 59, 34, 65, ~
#> $ var_9 <int> 80, 88, 24, 56, 90, 1, 16, 26, 77, 7, 90, 31, 89, 61, 46, 7~
#> $ var_10 <int> 31, 32, 87, 33, 13, 36, 93, 88, 82, 2, 63, 78, 72, 19, 58, ~
data <- create_data_unfair(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 22
#> $ age <int> 46, 94, 66, 29, 82, 57, 65, 58, 29, 40, 84, 72, 24, 87, 41~
#> $ gender <chr> "Female", "Female", "Male", "Male", "Female", "Female", "F~
#> $ eye_color <chr> "Blue", "Green", "Blue", "Blue", "Blue", "Brown", "Brown",~
#> $ shoe_size <dbl> 45.2, 37.0, 45.0, 45.0, 39.0, 38.2, 41.2, 46.0, 40.0, 42.0~
#> $ iq <dbl> 141, 71, 80, 74, 119, 95, 97, 135, 88, 140, 71, 126, 106, ~
#> $ education <int> 66, 41, 49, 49, 25, 68, 87, 46, 78, 14, 65, 62, 68, 16, 69~
#> $ income <dbl> 132.0, 95.0, 18.0, 54.0, 70.0, 128.0, 128.5, 32.0, 82.0, 9~
#> $ handset <chr> "Apple", "Apple", "Apple", "Android", "Apple", "Android", ~
#> $ pet <chr> "Other", "Cat", "Cat", "Dog", "Cat", "No", "Dog", "No", "N~
#> $ smoking <int> 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0~
#> $ name_arabic <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0~
#> $ outfit <chr> "Casual", "Casual", "Casual", "Alternative", "Elegant", "A~
#> $ glasses <int> 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1~
#> $ tatoos <int> 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0~
#> $ kids <int> 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0~
#> $ bad_debt <dbl> 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0~
#> $ credit_card <chr> "No", "Master", "Master", "No", "No", "Visa", "Visa", "Vis~
#> $ left_handed <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0~
#> $ skin_color <chr> "White", "Brown", "White", "White", "White", "White", "Bla~
#> $ religion <chr> "Christian", "No", "Christian", "No", "Christian", "No", "~
#> $ internet_gb <dbl> 0.000000, 60.609298, 260.437887, 55.199729, 0.000000, 179.~
#> $ target_ind <int> 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1~
Create an empty data set and add random variables.
data <- create_data_empty(obs = 1000) %>%
add_var_random_01("smoking", prob = c(0.8, 0.2)) %>%
add_var_random_cat("gender",
cat = c("female", "male", "diverse"),
prob = c(0.45, 0.45, 0.1)) %>%
add_var_random_dbl("internet_usage", min_val = 0, max_val = 1000) %>%
add_var_random_int("age", min_val = 18, max_val = 100) %>%
add_var_random_moon() %>%
add_var_random_starsign()
glimpse(data)
#> Rows: 1,000
#> Columns: 6
#> $ smoking <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ~
#> $ gender <chr> "male", "male", "female", "male", "female", "female", ~
#> $ internet_usage <dbl> 923.7630, 979.0669, 773.8658, 697.6332, 470.4925, 609.~
#> $ age <int> 84, 54, 44, 45, 60, 73, 60, 74, 62, 46, 81, 95, 58, 19~
#> $ random_moon <chr> "Waxing (+)", "Waning (-)", "Waning (-)", "Waxing (+)"~
#> $ random_starsign <chr> "Saggitarius", "Saggitarius", "Libra", "Pisces", "Pisc~