Tidypredict with recipes

turning workflow

to SQL, spark, duckdb and beyond

You have your final model

What now?

prediction

library(tidymodels)
library(bonsai)

set.seed(1234)

data(penguins, package = "modeldata")

penguins_split <- initial_split(drop_na(penguins, body_mass_g))
penguins_train <- training(penguins_split)
penguins_test <- testing(penguins_split)

rec_spec <- recipe(body_mass_g ~ ., data = penguins_train) |>
  step_unknown(all_nominal_predictors()) |>
  step_impute_median(all_numeric_predictors()) |>
  step_dummy(all_nominal_predictors()) |>
  step_nzv(all_predictors()) |>
  step_scale(all_numeric_predictors()) |>
  step_center(all_numeric_predictors()) |>
  step_corr(all_predictors(), threshold = 0.5)

tree_spec <- decision_tree(tree_depth = 2) |>
  set_mode("regression") |>
  set_engine("partykit")

wf_spec <- workflow(rec_spec, tree_spec)
wf_fit <- fit(wf_spec, penguins_train)

predict(wf_fit, penguins_test)
# A tibble: 86 × 1
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ 76 more rows

{tidypredict}

Enables running predictions inside databases

  1. Parses model
  2. extract sufficient information
  3. creates R formula that can be translated to SQL

{tidypredict} offers support for many types of models

  • Linear Regression - lm()
  • Generalized Linear model - glm()
  • Random Forest models - randomForest::randomForest()
  • Random Forest models, via ranger - ranger::ranger()
  • MARS models - earth::earth()
  • XGBoost models - xgboost::xgb.Booster.complete()
  • Cubist models - Cubist::cubist()
  • Tree models, via partykit - partykit::ctree()

Why this talk?

{tidypredict} is limited to 1 equation which makes:

  • recipes support very hard
  • for redundant calculations
  • classification probabilities awkward

How does it work?

Fitted party:
[1] root
|   [2] bill_length_mm <= -0.22359
|   |   [3] sex_male <= -0.92289: 3414.552 (n = 67, err = 8536436.6)
|   |   [4] sex_male > -0.92289: 3989.205 (n = 44, err = 5141747.2)
|   [5] bill_length_mm > -0.22359
|   |   [6] species_Chinstrap <= -0.48558: 4968.182 (n = 99, err = 26352272.7)
|   |   [7] species_Chinstrap > -0.48558: 3783.152 (n = 46, err = 6315067.9)

How does it work?

if (bill_length_mm <= -0.2235864) {
  if(sex_male <= -0.92289) {
    return(3414.552)
  } else {
    return(3989.205)
  }
} else {
  if (species_Chinstrap <= -0.48558) {
    return(4968.182)
  } else {
    return(3783.152)
  }
}

How does it work?

case_when(
  sex_male <= -0.9228935 & bill_length_mm <= -0.2235864 ~ 3414.552, 
  sex_male > -0.9228935 & bill_length_mm <= -0.2235864 ~ 3989.205,
  species_Chinstrap <= -0.4855824 & bill_length_mm > -0.2235864 ~ 4968.182,
  species_Chinstrap > -0.4855824 & bill_length_mm > -0.2235864 ~ 3783.152
)

How do workflows work?

rec_spec <- recipe(body_mass_g ~ ., data = penguins_train) |>
  step_unknown(all_nominal_predictors()) |>
  step_impute_median(all_numeric_predictors()) |>
  step_dummy(all_nominal_predictors()) |>
  step_nzv(all_predictors()) |>
  step_scale(all_numeric_predictors()) |>
  step_center(all_numeric_predictors()) |>
  step_corr(all_predictors(), threshold = 0.5)

tree_spec <- decision_tree(tree_depth = 2) |>
  set_mode("regression") |>
  set_engine("partykit")

wf_spec <- workflow(rec_spec, tree_spec)

species = dplyr::if_else(is.na(species), “unknown”, species)
island = dplyr::if_else(is.na(island), “unknown”, island)
sex = dplyr::if_else(is.na(sex), “unknown”, sex)
bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 44.95, bill_length_mm)
bill_depth_mm = dplyr::if_else(is.na(bill_depth_mm), 17.3, bill_depth_mm)
flipper_length_mm = dplyr::if_else(is.na(flipper_length_mm), 198, flipper_length_mm)
species_Adelie = as.numeric(species == “Adelie”)
species_Chinstrap = as.numeric(species == “Chinstrap”)
species_Gentoo = as.numeric(species == “Gentoo”)
species_unknown = as.numeric(species == “unknown”)
island_Biscoe = as.numeric(island == “Biscoe”)
island_Dream = as.numeric(island == “Dream”)
island_Torgersen = as.numeric(island == “Torgersen”)
island_unknown = as.numeric(island == “unknown”)
sex_female = as.numeric(sex == “female”)
sex_male = as.numeric(sex == “male”)
sex_unknown = as.numeric(sex == “unknown”)
bill_length_mm = bill_length_mm / 5.451399
bill_depth_mm = bill_depth_mm / 1.98353
flipper_length_mm = flipper_length_mm / 14.04647
species_Chinstrap = species_Chinstrap / 0.4001953
species_Gentoo = species_Gentoo / 0.484043
island_Dream = island_Dream / 0.4795927
island_Torgersen = island_Torgersen / 0.3562296
sex_male = sex_male / 0.5002297
bill_length_mm = bill_length_mm - 8.04911
bill_depth_mm = bill_depth_mm - 8.628085
flipper_length_mm = flipper_length_mm - 14.32021
species_Chinstrap = species_Chinstrap - 0.4978039
species_Gentoo = species_Gentoo - 0.7666532
island_Dream = island_Dream - 0.7411888
island_Torgersen = island_Torgersen - 0.4166906
sex_male = sex_male - 0.9448784

.pred = case_when(sex_male <= -0.9448784 & bill_length_mm <= -0.2162576 ~ 3425.781, sex_male > -0.9448784 & bill_length_mm <= -0.2162576 ~ 4028.804, species_Chinstrap <= -0.4978039 & bill_length_mm > -0.2162576 ~ 5008.838, species_Chinstrap > -0.4978039 & bill_length_mm > -0.2162576 ~ 3786.702)

step_unknown()

step_impute_median()

step_dummy()

step_scale()

step_center()

species = dplyr::if_else(is.na(species), “unknown”, species)

island = dplyr::if_else(is.na(island), “unknown”, island)

sex = dplyr::if_else(is.na(sex), “unknown”, sex)

bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 44.95, bill_length_mm)

bill_depth_mm = dplyr::if_else(is.na(bill_depth_mm), 17.3, bill_depth_mm)

flipper_length_mm = dplyr::if_else(is.na(flipper_length_mm), 198, flipper_length_mm)

species_Adelie = as.numeric(species == “Adelie”)

species_Chinstrap = as.numeric(species == “Chinstrap”)

species_Gentoo = as.numeric(species == “Gentoo”)

species_unknown = as.numeric(species == “unknown”)

island_Biscoe = as.numeric(island == “Biscoe”)

island_Dream = as.numeric(island == “Dream”)

island_Torgersen = as.numeric(island == “Torgersen”)

island_unknown = as.numeric(island == “unknown”)

sex_female = as.numeric(sex == “female”)

sex_male = as.numeric(sex == “male”)

sex_unknown = as.numeric(sex == “unknown”)

bill_length_mm = bill_length_mm / 5.451399

bill_depth_mm = bill_depth_mm / 1.98353

flipper_length_mm = flipper_length_mm / 14.04647

species_Chinstrap = species_Chinstrap / 0.4001953

species_Gentoo = species_Gentoo / 0.4840438

island_Dream = island_Dream / 0.4795927

island_Torgersen = island_Torgersen / 0.3562296

sex_male = sex_male / 0.5002297

bill_length_mm = bill_length_mm - 8.04911

bill_depth_mm = bill_depth_mm - 8.628085

flipper_length_mm = flipper_length_mm - 14.32021

species_Chinstrap = species_Chinstrap - 0.4978039

species_Gentoo = species_Gentoo - 0.7666532

island_Dream = island_Dream - 0.7411888

island_Torgersen = island_Torgersen - 0.4166906

sex_male = sex_male - 0.9448784

.pred = case_when(sex_male <= -0.9448784 & bill_length_mm <= -0.2162576 ~ 3425.781, sex_male > -0.9448784 & bill_length_mm <= -0.2162576 ~ 4028.804, species_Chinstrap <= -0.4978039 & bill_length_mm > -0.2162576 ~ 5008.838, species_Chinstrap > -0.4978039 & bill_length_mm > -0.2162576 ~ 3786.702)

species = dplyr::if_else(is.na(species), “unknown”, species)

sex = dplyr::if_else(is.na(sex), “unknown”, sex)

bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 44.95, bill_length_mm)

species_Chinstrap = as.numeric(species == “Chinstrap”)

sex_male = as.numeric(sex == “male”)

bill_length_mm = bill_length_mm / 5.451399

species_Chinstrap = species_Chinstrap / 0.4001953

sex_male = sex_male / 0.5002297

bill_length_mm = bill_length_mm - 8.04911

species_Chinstrap = species_Chinstrap - 0.4978039

sex_male = sex_male - 0.9448784

.pred = case_when(sex_male <= -0.9448784 & bill_length_mm <= -0.2162576 ~ 3425.781, sex_male > -0.9448784 & bill_length_mm <= -0.2162576 ~ 4028.804, species_Chinstrap <= -0.4978039 & bill_length_mm > -0.2162576 ~ 5008.838, species_Chinstrap > -0.4978039 & bill_length_mm > -0.2162576 ~ 3786.702)

using orbital

use main function orbital() on fitted workflow

library(orbital)

orbital_obj <- orbital(wf_fit)
orbital_obj
── orbital Object ────────────────────────────────────────────────────────────────────────────────
• species = dplyr::if_else(is.na(species), "unknown", species)
• sex = dplyr::if_else(is.na(sex), "unknown", sex)
• bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 44.5, bill_length_mm)
• species_Chinstrap = as.numeric(species == "Chinstrap")
• sex_male = as.numeric(sex == "male")
• bill_length_mm = bill_length_mm / 5.478623
• species_Chinstrap = species_Chinstrap / 0.4001953
• sex_male = sex_male / 0.5002297
• bill_length_mm = bill_length_mm - 8.012036
• species_Chinstrap = species_Chinstrap - 0.4978039
• sex_male = sex_male - 0.9448784
• .pred = case_when(sex_male <= -0.9448784 & bill_length_mm <= -0.1268424 ~ 3467.958, sex_m ...
──────────────────────────────────────────────────────────────────────────────────────────────────
12 equations in total.

Prediction

Code Generation

Prediction

predict(orbital_obj, penguins_test)
# A tibble: 86 × 1
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ 76 more rows

predict(wf_fit, penguins_test)
# A tibble: 86 × 1
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ 76 more rows
predict(orbital_obj, penguins_test)
# A tibble: 86 × 1
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ 76 more rows

predicting with tibble

new_penguins <- penguins_test

predict(orbital_obj, new_penguins)
# A tibble: 86 × 1
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ 76 more rows

predicting with SQL

library(dbplyr)
library(RSQLite)
library(DBI)

con <- dbConnect(SQLite(), path = ":memory:")
new_penguins <- copy_to(con, penguins_test)

predict(orbital_obj, new_penguins)
# Source:   SQL [?? x 1]
# Database: sqlite 3.46.0 []
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ more rows

predicting with spark

library(sparklyr)

con <- spark_connect(master = "local")
new_penguins <- copy_to(con, penguins_test)

predict(orbital_obj, new_penguins)
# Source:   SQL [?? x 1]
# Database: spark_connection
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ more rows

predicting with arrow

library(arrow)

new_penguins <- as_arrow_table(penguins_test)

predict(orbital_obj, new_penguins)
Table (query)
.pred: double (case_when({1=((subtract_checked(divide(cast(cast((if_else(is_null(sex, {nan_is_null=true}), [
  "unknown"
][0], sex) == [
  "male"
][0]), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), cast(0.500841815855869, {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false})), 1.02171730434597) <= -1.02171730434597) and (subtract_checked(divide(cast(if_else(is_null(bill_length_mm, {nan_is_null=true}), 43.3, bill_length_mm), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), cast(5.53764488534842, {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false})), 7.89221731979121) <= -0.217474558216316)), 2=((subtract_checked(divide(cast(cast((if_else(is_null(sex, {nan_is_null=true}), [
  "unknown"
][0], sex) == [
  "male"
][0]), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), cast(0.500841815855869, {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false})), 1.02171730434597) > -1.02171730434597) and (subtract_checked(divide(cast(if_else(is_null(bill_length_mm, {nan_is_null=true}), 43.3, bill_length_mm), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), cast(5.53764488534842, {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false})), 7.89221731979121) <= -0.217474558216316)), 3=((subtract_checked(divide(cast(cast((if_else(is_null(species, {nan_is_null=true}), [
  "unknown"
][0], species) == [
  "Chinstrap"
][0]), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), cast(0.39721765160312, {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false})), 0.491701461935901) <= -0.491701461935901) and (subtract_checked(divide(cast(if_else(is_null(bill_length_mm, {nan_is_null=true}), 43.3, bill_length_mm), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), cast(5.53764488534842, {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false})), 7.89221731979121) > -0.217474558216316)), 4=((subtract_checked(divide(cast(cast((if_else(is_null(species, {nan_is_null=true}), [
  "unknown"
][0], species) == [
  "Chinstrap"
][0]), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), cast(0.39721765160312, {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false})), 0.491701461935901) > -0.491701461935901) and (subtract_checked(divide(cast(if_else(is_null(bill_length_mm, {nan_is_null=true}), 43.3, bill_length_mm), {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false}), cast(5.53764488534842, {to_type=double, allow_int_overflow=false, allow_time_truncate=false, allow_time_overflow=false, allow_decimal_truncate=false, allow_float_truncate=false, allow_invalid_utf8=false})), 7.89221731979121) > -0.217474558216316))}, 3394.140625, 3985.09615384615, 4980.37634408602, 3757.97872340426))

See $.data for the source Arrow object

predicting with duckdb

library(duckdb)

con <- dbConnect(duckdb(dbdir = ":memory:"))
new_penguins <- copy_to(con, penguins_test)

predict(orbital_obj, new_penguins)
# Source:   SQL [?? x 1]
# Database: DuckDB v1.0.0 [root@Darwin 23.5.0:R 4.4.0/:memory:]
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ more rows

Prediction

Code Generation

Code Generation

Exporting SQL

con <- dbConnect(SQLite(), path = ":memory:")
orbital_sql(orbital_obj, con)
<SQL> CASE WHEN ((`species` IS NULL)) THEN 'unknown' WHEN NOT ((`species` IS NULL)) THEN `species` END AS species
<SQL> CASE WHEN ((`sex` IS NULL)) THEN 'unknown' WHEN NOT ((`sex` IS NULL)) THEN `sex` END AS sex
<SQL> CASE WHEN ((`bill_length_mm` IS NULL)) THEN 43.3 WHEN NOT ((`bill_length_mm` IS NULL)) THEN `bill_length_mm` END AS bill_length_mm
<SQL> CAST(`species` = 'Chinstrap' AS REAL) AS species_Chinstrap
<SQL> CAST(`sex` = 'male' AS REAL) AS sex_male
<SQL> `bill_length_mm` / 5.53764488534842 AS bill_length_mm
<SQL> `species_Chinstrap` / 0.39721765160312 AS species_Chinstrap
<SQL> `sex_male` / 0.500841815855869 AS sex_male
<SQL> `bill_length_mm` - 7.89221731979121 AS bill_length_mm
<SQL> `species_Chinstrap` - 0.491701461935901 AS species_Chinstrap
<SQL> `sex_male` - 1.02171730434597 AS sex_male
<SQL> CASE
WHEN (`sex_male` <= -1.02171730434597 AND `bill_length_mm` <= -0.217474558216316) THEN 3394.140625
WHEN (`sex_male` > -1.02171730434597 AND `bill_length_mm` <= -0.217474558216316) THEN 3985.09615384615
WHEN (`species_Chinstrap` <= -0.491701461935901 AND `bill_length_mm` > -0.217474558216316) THEN 4980.37634408602
WHEN (`species_Chinstrap` > -0.491701461935901 AND `bill_length_mm` > -0.217474558216316) THEN 3757.97872340426
END AS .pred

shiny app

shiny app - function

orbital_r_fun(orbital_obj, file = "shiny-fun.R")
orbital_predict <- function(x) {
with(x, {
   species = dplyr::if_else(is.na(species), "unknown", species)
   sex = dplyr::if_else(is.na(sex), "unknown", sex)
   bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 43.3, bill_length_mm)
   species_Chinstrap = as.numeric(species == "Chinstrap")
   sex_male = as.numeric(sex == "male")
   bill_length_mm = bill_length_mm / 5.53764488534842
   species_Chinstrap = species_Chinstrap / 0.39721765160312
   sex_male = sex_male / 0.500841815855869
   bill_length_mm = bill_length_mm - 7.89221731979121
   species_Chinstrap = species_Chinstrap - 0.491701461935901
   sex_male = sex_male - 1.02171730434597
   .pred = case_when(sex_male <= -1.02171730434597 & bill_length_mm <= -0.217474558216316 ~ 3394.140625, sex_male > -1.02171730434597 & bill_length_mm <= -0.217474558216316 ~ 3985.09615384615, species_Chinstrap <= -0.491701461935901 & bill_length_mm > -0.217474558216316 ~ 4980.37634408602, species_Chinstrap > -0.491701461935901 & bill_length_mm > -0.217474558216316 ~ 3757.97872340426)
  .pred
  })
}

shiny app - UI

library(shiny)

selectInput("species", "species:", c("Adelie", "Gentoo", "Chinstrap"), "Adelie")
selectInput("island", "island:", c("Torgersen", "Biscoe", "Dream"), "Dream")
sliderInput("bill_length_mm", "bill_length_mm:", min = 1, max = 50, value = 20)
sliderInput("bill_depth_mm", "bill_depth_mm:", min = 1, max = 50, value = 20)
sliderInput("flipper_length_mm", "flipper_length_mm:", min = 1, max = 300, value = 20)
selectInput("sex", "sex:", c("male", "female", NA), "male")

textOutput("weight")

shiny app - server

library(dplyr)
source("shiny-fun.R")

output$weight <- renderText({
  .pred <- orbital_predict(
    data.frame(
      species = input$species,
      island = input$island,
      bill_length_mm = input$bill_length_mm,
      bill_depth_mm = input$bill_depth_mm,
      flipper_length_mm = input$flipper_length_mm,
      sex = input$sex
    )
  )

  .pred
})

#| standalone: true
library(shiny)
library(bslib)
library(dplyr)

ui <- page_fillable(
  layout_columns(
     card(
      selectInput("species", "species:", c("Adelie", "Gentoo", "Chinstrap"), "Adelie"),
      selectInput("island", "island:", c("Torgersen", "Biscoe", "Dream"), "Dream")
     ),
     card(
       selectInput("sex", "sex:", c("male", "female", NA), "male"),
       sliderInput("flipper_length_mm", "flipper_length_mm:", min = 1, max = 300, value = 20)
     ),
     card(
      sliderInput("bill_length_mm", "bill_length_mm:", min = 1, max = 50, value = 20),
      sliderInput("bill_depth_mm", "bill_depth_mm:", min = 1, max = 50, value = 20)
     )
  ),
  card(
    card_header("Predicted Weight"),
    textOutput("weight")
  ),
  tags$head(tags$style("body {font-size: 36px;}")),
  tags$head(tags$style("div.card-header {font-size: 36px !important;}")),
  tags$head(tags$style(".shiny-text-output {color:#E21285; font-size: 100px; text-align: center;}"))
)

server <- function(input, output) {

  orbital_predict <- function(x) {
    with(x, {
       species = dplyr::if_else(is.na(species), "unknown", species)
       sex = dplyr::if_else(is.na(sex), "unknown", sex)
       bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 43.3, bill_length_mm)
       species_Chinstrap = as.numeric(species == "Chinstrap")
       sex_male = as.numeric(sex == "male")
       bill_length_mm = bill_length_mm / 5.53764488534842
       species_Chinstrap = species_Chinstrap / 0.39721765160312
       sex_male = sex_male / 0.500841815855869
       bill_length_mm = bill_length_mm - 7.89221731979121
       species_Chinstrap = species_Chinstrap - 0.491701461935901
       sex_male = sex_male - 1.02171730434597
       .pred = case_when(sex_male <= -1.02171730434597 & bill_length_mm <= -0.217474558216316 ~ 3394.140625, sex_male > -1.02171730434597 & bill_length_mm <= -0.217474558216316 ~ 3985.09615384615, species_Chinstrap <= -0.491701461935901 & bill_length_mm > -0.217474558216316 ~ 4980.37634408602, species_Chinstrap > -0.491701461935901 & bill_length_mm > -0.217474558216316 ~ 3757.97872340426)
      .pred
      })
    }

    output$weight <- renderText({
      .pred <- orbital_predict(
        data.frame(
          species = input$species,
          island = input$island,
          bill_length_mm = input$bill_length_mm,
          bill_depth_mm = input$bill_depth_mm,
          flipper_length_mm = input$flipper_length_mm,
          sex = input$sex
        )
      )
    
      .pred
    })
}

shinyApp(ui = ui, server = server)

Pure Javascript prediction

Why use this package?

  • Cons
    • not all models and recipes are supported
    • don’t get any input checking
    • very new, be careful
  • Pros
    • way smaller or no docker containers needed
    • predictions in databases
    • code generation

Thank you!