Stop Horsing Around

Ship the Query!

using orbital to run predictions in the database

library(tidymodels)
library(bonsai)

set.seed(1234)

data(penguins, package = "modeldata")

penguins_split <- initial_split(drop_na(penguins, body_mass_g))
penguins_train <- training(penguins_split)
penguins_test <- testing(penguins_split)

rec_spec <- recipe(body_mass_g ~ ., data = penguins_train) |>
  step_unknown(all_nominal_predictors()) |>
  step_impute_median(all_numeric_predictors()) |>
  step_dummy(all_nominal_predictors()) |>
  step_nzv(all_predictors()) |>
  step_scale(all_numeric_predictors()) |>
  step_center(all_numeric_predictors()) |>
  step_corr(all_predictors(), threshold = 0.5)

tree_spec <- decision_tree(tree_depth = 2) |>
  set_mode("regression") |>
  set_engine("partykit")

wf_spec <- workflow(rec_spec, tree_spec)
wf_fit <- fit(wf_spec, penguins_train)

You have your final model

What now?

prediction

predict(wf_fit, penguins_test)
# A tibble: 86 × 1
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ 76 more rows

{tidypredict}

Enables running predictions inside databases

  1. Parses model
  2. extract sufficient information
  3. creates R formula that can be translated to SQL

{tidypredict} offers support for many types of models

  • Linear Regression - lm()
  • Generalized Linear model - glm()
  • Random Forest models - randomForest::randomForest()
  • Random Forest models, via ranger - ranger::ranger()
  • MARS models - earth::earth()
  • XGBoost models - xgboost::xgb.Booster.complete()
  • Cubist models - Cubist::cubist()
  • Tree models, via partykit - partykit::ctree()

Why this talk?

{tidypredict} is limited to 1 equation which makes:

  • pre and post processing very hard
  • for redundant calculations
  • classification probabilities awkward

How does it work?

Fitted party:
[1] root
|   [2] bill_length_mm <= -0.22359
|   |   [3] sex_male <= -0.92289: 3414.552 (n = 67, err = 8536436.6)
|   |   [4] sex_male > -0.92289: 3989.205 (n = 44, err = 5141747.2)
|   [5] bill_length_mm > -0.22359
|   |   [6] species_Chinstrap <= -0.48558: 4968.182 (n = 99, err = 26352272.7)
|   |   [7] species_Chinstrap > -0.48558: 3783.152 (n = 46, err = 6315067.9)

How does it work?

if (bill_length_mm <= -0.2235864) {
  if(sex_male <= -0.92289) {
    return(3414.552)
  } else {
    return(3989.205)
  }
} else {
  if (species_Chinstrap <= -0.48558) {
    return(4968.182)
  } else {
    return(3783.152)
  }
}

How does it work?

case_when(
  sex_male <= -0.9228935 & bill_length_mm <= -0.2235864 ~ 3414.552, 
  sex_male > -0.9228935 & bill_length_mm <= -0.2235864 ~ 3989.205,
  species_Chinstrap <= -0.4855824 & bill_length_mm > -0.2235864 ~ 4968.182,
  species_Chinstrap > -0.4855824 & bill_length_mm > -0.2235864 ~ 3783.152
)

How do workflows work?

rec_spec <- recipe(body_mass_g ~ ., data = penguins_train) |>
  step_unknown(all_nominal_predictors()) |>
  step_impute_median(all_numeric_predictors()) |>
  step_dummy(all_nominal_predictors()) |>
  step_nzv(all_predictors()) |>
  step_scale(all_numeric_predictors()) |>
  step_center(all_numeric_predictors()) |>
  step_corr(all_predictors(), threshold = 0.5)

tree_spec <- decision_tree(tree_depth = 2) |>
  set_mode("regression") |>
  set_engine("partykit")

wf_spec <- workflow(rec_spec, tree_spec)

species = dplyr::if_else(is.na(species), “unknown”, species)
island = dplyr::if_else(is.na(island), “unknown”, island)
sex = dplyr::if_else(is.na(sex), “unknown”, sex)
bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 44.95, bill_length_mm)
bill_depth_mm = dplyr::if_else(is.na(bill_depth_mm), 17.3, bill_depth_mm)
flipper_length_mm = dplyr::if_else(is.na(flipper_length_mm), 198, flipper_length_mm)
species_Adelie = as.numeric(species == “Adelie”)
species_Chinstrap = as.numeric(species == “Chinstrap”)
species_Gentoo = as.numeric(species == “Gentoo”)
species_unknown = as.numeric(species == “unknown”)
island_Biscoe = as.numeric(island == “Biscoe”)
island_Dream = as.numeric(island == “Dream”)
island_Torgersen = as.numeric(island == “Torgersen”)
island_unknown = as.numeric(island == “unknown”)
sex_female = as.numeric(sex == “female”)
sex_male = as.numeric(sex == “male”)
sex_unknown = as.numeric(sex == “unknown”)
bill_length_mm = bill_length_mm / 5.451399
bill_depth_mm = bill_depth_mm / 1.98353
flipper_length_mm = flipper_length_mm / 14.04647
species_Chinstrap = species_Chinstrap / 0.4001953
species_Gentoo = species_Gentoo / 0.484043
island_Dream = island_Dream / 0.4795927
island_Torgersen = island_Torgersen / 0.3562296
sex_male = sex_male / 0.5002297
bill_length_mm = bill_length_mm - 8.04911
bill_depth_mm = bill_depth_mm - 8.628085
flipper_length_mm = flipper_length_mm - 14.32021
species_Chinstrap = species_Chinstrap - 0.4978039
species_Gentoo = species_Gentoo - 0.7666532
island_Dream = island_Dream - 0.7411888
island_Torgersen = island_Torgersen - 0.4166906
sex_male = sex_male - 0.9448784

.pred = case_when(sex_male <= -0.9448784 & bill_length_mm <= -0.2162576 ~ 3425.781, sex_male > -0.9448784 & bill_length_mm <= -0.2162576 ~ 4028.804, species_Chinstrap <= -0.4978039 & bill_length_mm > -0.2162576 ~ 5008.838, species_Chinstrap > -0.4978039 & bill_length_mm > -0.2162576 ~ 3786.702)

step_unknown()

step_impute_median()

step_dummy()

step_scale()

step_center()

species = dplyr::if_else(is.na(species), “unknown”, species)

island = dplyr::if_else(is.na(island), “unknown”, island)

sex = dplyr::if_else(is.na(sex), “unknown”, sex)

bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 44.95, bill_length_mm)

bill_depth_mm = dplyr::if_else(is.na(bill_depth_mm), 17.3, bill_depth_mm)

flipper_length_mm = dplyr::if_else(is.na(flipper_length_mm), 198, flipper_length_mm)

species_Adelie = as.numeric(species == “Adelie”)

species_Chinstrap = as.numeric(species == “Chinstrap”)

species_Gentoo = as.numeric(species == “Gentoo”)

species_unknown = as.numeric(species == “unknown”)

island_Biscoe = as.numeric(island == “Biscoe”)

island_Dream = as.numeric(island == “Dream”)

island_Torgersen = as.numeric(island == “Torgersen”)

island_unknown = as.numeric(island == “unknown”)

sex_female = as.numeric(sex == “female”)

sex_male = as.numeric(sex == “male”)

sex_unknown = as.numeric(sex == “unknown”)

bill_length_mm = bill_length_mm / 5.451399

bill_depth_mm = bill_depth_mm / 1.98353

flipper_length_mm = flipper_length_mm / 14.04647

species_Chinstrap = species_Chinstrap / 0.4001953

species_Gentoo = species_Gentoo / 0.4840438

island_Dream = island_Dream / 0.4795927

island_Torgersen = island_Torgersen / 0.3562296

sex_male = sex_male / 0.5002297

bill_length_mm = bill_length_mm - 8.04911

bill_depth_mm = bill_depth_mm - 8.628085

flipper_length_mm = flipper_length_mm - 14.32021

species_Chinstrap = species_Chinstrap - 0.4978039

species_Gentoo = species_Gentoo - 0.7666532

island_Dream = island_Dream - 0.7411888

island_Torgersen = island_Torgersen - 0.4166906

sex_male = sex_male - 0.9448784

.pred = case_when(sex_male <= -0.9448784 & bill_length_mm <= -0.2162576 ~ 3425.781, sex_male > -0.9448784 & bill_length_mm <= -0.2162576 ~ 4028.804, species_Chinstrap <= -0.4978039 & bill_length_mm > -0.2162576 ~ 5008.838, species_Chinstrap > -0.4978039 & bill_length_mm > -0.2162576 ~ 3786.702)

species = dplyr::if_else(is.na(species), “unknown”, species)

sex = dplyr::if_else(is.na(sex), “unknown”, sex)

bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 44.95, bill_length_mm)

species_Chinstrap = as.numeric(species == “Chinstrap”)

sex_male = as.numeric(sex == “male”)

bill_length_mm = bill_length_mm / 5.451399

species_Chinstrap = species_Chinstrap / 0.4001953

sex_male = sex_male / 0.5002297

bill_length_mm = bill_length_mm - 8.04911

species_Chinstrap = species_Chinstrap - 0.4978039

sex_male = sex_male - 0.9448784

.pred = case_when(sex_male <= -0.9448784 & bill_length_mm <= -0.2162576 ~ 3425.781, sex_male > -0.9448784 & bill_length_mm <= -0.2162576 ~ 4028.804, species_Chinstrap <= -0.4978039 & bill_length_mm > -0.2162576 ~ 5008.838, species_Chinstrap > -0.4978039 & bill_length_mm > -0.2162576 ~ 3786.702)

using orbital

use main function orbital() on fitted workflow

library(orbital)

orbital_obj <- orbital(wf_fit)
orbital_obj
── orbital Object ────────────────────────────────────────────────────────────────────────────────
• species = dplyr::if_else(is.na(species), "unknown", species)
• sex = dplyr::if_else(is.na(sex), "unknown", sex)
• bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 44.5, bill_length_mm)
• species_Chinstrap = as.numeric(species == "Chinstrap")
• sex_male = as.numeric(sex == "male")
• bill_length_mm = bill_length_mm / 5.478623
• species_Chinstrap = species_Chinstrap / 0.4001953
• sex_male = sex_male / 0.5002297
• bill_length_mm = bill_length_mm - 8.012036
• species_Chinstrap = species_Chinstrap - 0.4978039
• sex_male = sex_male - 0.9448784
• .pred = dplyr::case_when(sex_male <= -1.021717 & bill_length_mm <= -0.2174746 ~ 3394.141, 
sex_male > -1.021717 & bill_length_mm <= -0.2174746 ~ 3985.096, species_Chinstrap <= -0.4917015 & 
bill_length_mm > -0.2174746 ~ 4980.376, species_Chinstrap > -0.4917015 & bill_length_mm > 
-0.2174746 ~ 3757.979)
──────────────────────────────────────────────────────────────────────────────────────────────────
12 equations in total.

Redundancies in single expression

.pred = dplyr::case_when(
  as.numeric(dplyr::if_else(is.na(sex), "unknown", sex) == "male") /
    0.5008418 -
    1.021717 <=
    -1.021717 &
    dplyr::if_else(is.na(bill_length_mm), 43.3, bill_length_mm) /
      5.537645 -
      7.892217 <=
      -0.2174746 ~
    3394.141,
  as.numeric(dplyr::if_else(is.na(sex), "unknown", sex) == "male") /
    0.5008418 -
    1.021717 >
    -1.021717 &
    dplyr::if_else(is.na(bill_length_mm), 43.3, bill_length_mm) /
      5.537645 -
      7.892217 <=
      -0.2174746 ~
    3985.096,
  as.numeric(
    dplyr::if_else(is.na(species), "unknown", species) == "Chinstrap"
  ) / 0.3972177 -
    0.4917015 <=
    -0.4917015 &
    dplyr::if_else(is.na(bill_length_mm), 43.3, bill_length_mm) /
      5.537645 -
      7.892217 >
      -0.2174746 ~
    4980.376,
  as.numeric(
    dplyr::if_else(is.na(species), "unknown", species) == "Chinstrap"
  ) /
    0.3972177 -
    0.4917015 >
    -0.4917015 &
    dplyr::if_else(is.na(bill_length_mm), 43.3, bill_length_mm) /
      5.537645 -
      7.892217 >
      -0.2174746 ~
    3757.979
)

Prediction

Code Generation

Prediction

predict(orbital_obj, penguins_test)
# A tibble: 86 × 1
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ 76 more rows

predict(wf_fit, penguins_test)
# A tibble: 86 × 1
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ 76 more rows
predict(orbital_obj, penguins_test)
# A tibble: 86 × 1
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ 76 more rows

predicting with tibble

new_penguins <- penguins_test

predict(orbital_obj, new_penguins)
# A tibble: 86 × 1
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ 76 more rows

predicting with SQL

library(dbplyr)
library(RSQLite)
library(DBI)

con <- dbConnect(SQLite(), path = ":memory:")
new_penguins <- copy_to(con, penguins_test)

predict(orbital_obj, new_penguins)
# Source:   SQL [?? x 1]
# Database: sqlite 3.50.1 []
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ more rows

predicting with spark

library(sparklyr)

con <- spark_connect(master = "local")
new_penguins <- copy_to(con, penguins_test)

predict(orbital_obj, new_penguins)
# Source:   SQL [?? x 1]
# Database: spark_connection
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ more rows

predicting with duckdb

library(duckdb)

con <- dbConnect(duckdb(dbdir = ":memory:"))
new_penguins <- copy_to(con, penguins_test)

predict(orbital_obj, new_penguins)
# Source:   SQL [?? x 1]
# Database: DuckDB v1.2.1 [root@Darwin 24.6.0:R 4.5.1/:memory:]
   .pred
   <dbl>
 1 3985.
 2 3394.
 3 3394.
 4 3394.
 5 3394.
 6 3985.
 7 3394.
 8 3394.
 9 3394.
10 3985.
# ℹ more rows

Prediction

Code Generation

Code Generation

Exporting SQL

con <- dbConnect(SQLite(), path = ":memory:")
orbital_sql(orbital_obj, con)
<SQL> CASE WHEN ((`species` IS NULL)) THEN 'unknown' WHEN NOT ((`species` IS NULL)) THEN `species` END AS species
<SQL> CASE WHEN ((`sex` IS NULL)) THEN 'unknown' WHEN NOT ((`sex` IS NULL)) THEN `sex` END AS sex
<SQL> CASE WHEN ((`bill_length_mm` IS NULL)) THEN 43.3 WHEN NOT ((`bill_length_mm` IS NULL)) THEN `bill_length_mm` END AS bill_length_mm
<SQL> CAST(`species` = 'Chinstrap' AS REAL) AS species_Chinstrap
<SQL> CAST(`sex` = 'male' AS REAL) AS sex_male
<SQL> `bill_length_mm` / 5.53764488534842 AS bill_length_mm
<SQL> `species_Chinstrap` / 0.39721765160312 AS species_Chinstrap
<SQL> `sex_male` / 0.500841815855869 AS sex_male
<SQL> `bill_length_mm` - 7.89221731979121 AS bill_length_mm
<SQL> `species_Chinstrap` - 0.491701461935901 AS species_Chinstrap
<SQL> `sex_male` - 1.02171730434597 AS sex_male
<SQL> CASE
WHEN (`sex_male` <= -1.02171730434597 AND `bill_length_mm` <= -0.217474558216316) THEN 3394.140625
WHEN (`sex_male` > -1.02171730434597 AND `bill_length_mm` <= -0.217474558216316) THEN 3985.09615384615
WHEN (`species_Chinstrap` <= -0.491701461935901 AND `bill_length_mm` > -0.217474558216316) THEN 4980.37634408602
WHEN (`species_Chinstrap` > -0.491701461935901 AND `bill_length_mm` > -0.217474558216316) THEN 3757.97872340426
END AS .pred

Why use this package?

Cons

  • not all models and recipes are supported
  • don’t get any input checking

Pros

  • way smaller or no docker containers needed
  • predictions in databases
  • code generation
  • sometimes insane speed

Predictions with data bases

This generated SQL can be put directly into a database, letting us “deploy” a model directly.

We will see this later

Why should I care to do this?

Ease of use

  • much simpler deployment method
  • possible way to avoid IT

Speed

  • 2 hour model now runs in 24 seconds,
  • millions of predictions per second,
  • 100s of millions of observations

News since first release!

classification support

The original announcement of orbital only worked with regression. We now support more models with classifation

Classification example

rec_spec <- recipe(species ~ sex + bill_length_mm + bill_depth_mm, data = penguins) |>
  step_unknown(all_nominal_predictors()) |>
  step_dummy(all_nominal_predictors()) |>
  step_impute_mean(all_numeric_predictors()) |>
  step_zv(all_predictors())

lr_spec <- boost_tree() |>
  set_mode("classification") |>
  set_engine("xgboost")

wf_spec <- workflow(rec_spec, lr_spec)
wf_fit <- fit(wf_spec, data = penguins)

Classification example

orbital_obj <- orbital(wf_fit)
orbital_obj
── orbital Object ──────────────────────────────────────────────────────────────────────────────────────────────────
• sex = dplyr::if_else(is.na(sex), "unknown", sex)
• sex_male = as.numeric(sex == "male")
• sex_unknown = as.numeric(sex == "unknown")
• bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 43.92193, bill_length_mm)
• bill_depth_mm = dplyr::if_else(is.na(bill_depth_mm), 17.15117, bill_depth_mm)
• sex_male = dplyr::if_else(is.na(sex_male), 0.4883721, sex_male)
• sex_unknown = dplyr::if_else(is.na(sex_unknown), 0.03197674, sex_unknown)
• Adelie = 0 + dplyr::case_when((bill_depth_mm < 15.1 | is.na(bill_depth_mm)) & (bill_length_mm < 42.35 | is. ...
• Chinstrap = 0 + dplyr::case_when((bill_length_mm < 42.35 | is.na(bill_length_mm)) & (bill_length_mm < 45.15 ...
• Gentoo = 0 + dplyr::case_when((bill_length_mm < 39.3 | is.na(bill_length_mm)) & (bill_depth_mm < 16.45 | is ...
• .pred_class = dplyr::case_when(Adelie > Chinstrap & Adelie > Gentoo ~ "Adelie", Chinstrap > Adelie & Chinst ...
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
11 equations in total.

Classification example

orbital_obj_prob <- orbital(wf_fit, type = c("class", "prob"))
orbital_obj_prob
── orbital Object ──────────────────────────────────────────────────────────────────────────────────────────────────
• sex = dplyr::if_else(is.na(sex), "unknown", sex)
• sex_male = as.numeric(sex == "male")
• sex_unknown = as.numeric(sex == "unknown")
• bill_length_mm = dplyr::if_else(is.na(bill_length_mm), 43.92193, bill_length_mm)
• bill_depth_mm = dplyr::if_else(is.na(bill_depth_mm), 17.15117, bill_depth_mm)
• sex_male = dplyr::if_else(is.na(sex_male), 0.4883721, sex_male)
• sex_unknown = dplyr::if_else(is.na(sex_unknown), 0.03197674, sex_unknown)
• Adelie = 0 + dplyr::case_when((bill_depth_mm < 15.1 | is.na(bill_depth_mm)) & (bill_length_mm < 42.35 | is. ...
• Chinstrap = 0 + dplyr::case_when((bill_length_mm < 42.35 | is.na(bill_length_mm)) & (bill_length_mm < 45.15 ...
• Gentoo = 0 + dplyr::case_when((bill_length_mm < 39.3 | is.na(bill_length_mm)) & (bill_depth_mm < 16.45 | is ...
• .pred_class = dplyr::case_when(Adelie > Chinstrap & Adelie > Gentoo ~ "Adelie", Chinstrap > Adelie & Chinst ...
• norm = exp(Adelie) + exp(Chinstrap) + exp(Gentoo)
• .pred_Adelie = exp(Adelie) / norm
• .pred_Chinstrap = exp(Chinstrap) / norm
• .pred_Gentoo = exp(Gentoo) / norm
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
15 equations in total.

Augment support

augment(orbital_obj, penguins)
# A tibble: 344 × 8
   .pred_class species island    bill_length_mm bill_depth_mm flipper_length_mm
   <chr>       <fct>   <fct>              <dbl>         <dbl>             <int>
 1 Adelie      Adelie  Torgersen           39.1          18.7               181
 2 Adelie      Adelie  Torgersen           39.5          17.4               186
 3 Adelie      Adelie  Torgersen           40.3          18                 195
 4 Adelie      Adelie  Torgersen           NA            NA                  NA
 5 Adelie      Adelie  Torgersen           36.7          19.3               193
 6 Adelie      Adelie  Torgersen           39.3          20.6               190
 7 Adelie      Adelie  Torgersen           38.9          17.8               181
 8 Adelie      Adelie  Torgersen           39.2          19.6               195
 9 Adelie      Adelie  Torgersen           34.1          18.1               193
10 Adelie      Adelie  Torgersen           42            20.2               190
# ℹ 334 more rows
# ℹ 2 more variables: body_mass_g <int>, sex <fct>

Augment support

con <- dbConnect(SQLite(), path = ":memory:")
new_penguins <- copy_to(con, penguins_test)
augment(orbital_obj, new_penguins)
# Source:   SQL [?? x 8]
# Database: sqlite 3.50.1 []
   .pred_class species island    bill_length_mm bill_depth_mm flipper_length_mm
   <chr>       <chr>   <chr>              <dbl>         <dbl>             <int>
 1 Adelie      Adelie  Torgersen           39.1          18.7               181
 2 Adelie      Adelie  Torgersen           39.5          17.4               186
 3 Adelie      Adelie  Torgersen           40.3          18                 195
 4 Adelie      Adelie  Torgersen           34.1          18.1               193
 5 Adelie      Adelie  Torgersen           37.8          17.3               180
 6 Adelie      Adelie  Torgersen           38.6          21.2               191
 7 Adelie      Adelie  Torgersen           38.7          19                 195
 8 Adelie      Adelie  Torgersen           34.4          18.4               184
 9 Adelie      Adelie  Biscoe              37.8          18.3               174
10 Adelie      Adelie  Biscoe              38.2          18.1               185
# ℹ more rows
# ℹ 2 more variables: body_mass_g <int>, sex <chr>

Python support

# Create a SciKit Learn Pipeline and Train it
pipeline = Pipeline([
    (
        "preprocess",
        ColumnTransformer(
            [("scaler", StandardScaler(with_std=False), COLUMNS)],
            remainder="passthrough",
        ),
    ),
    ("linear_regression", LinearRegression()),   
])
pipeline.fit(X_train, y_train)

# Convert it to an Orbital Pipeline
orbital_pipeline = orbital.parse_pipeline(
    pipeline,
    features={
        "sepal_length": orbital.types.DoubleColumnType(),
        "sepal_width": orbital.types.DoubleColumnType(),
        "petal_length": orbital.types.DoubleColumnType(),
        "petal_width": orbital.types.DoubleColumnType(),
    },
)

Python support


# Generate SQL
sql = orbital.export_sql("DATA_TABLE", orbital_pipeline, dialect="duckdb")
>>> print(sql)
SELECT ("t0"."sepal_length" - 5.809166666666666) * -0.11633479416518255 + 0.9916666666666668 +  
       ("t0"."sepal_width" - 3.0616666666666665) * -0.05977785171980231 + 
       ("t0"."petal_length" - 3.7266666666666666) * 0.25491374699772246 + 
       ("t0"."petal_width" - 1.1833333333333333) * 0.5475959809777828 
AS "variable" FROM "DATA_TABLE" AS "t0"

tailor support - dev

Tailor is the new post-processing package for tidymodels

The developmental version of orbital now works with tailor and most of its methods

Tree splitting - dev

This is work in progress

.pred = dplyr::case_when(
  (Petal.Length < 2.45 | is.na(Petal.Length)) ~ -0.5555556,
                         Petal.Length >= 2.45 ~ 0.5769231
) +
dplyr::case_when(
  (Petal.Length < 2.45 | is.na(Petal.Length)) ~ -0.4346052,
                         Petal.Length >= 2.45 ~ 0.4489912
) +
dplyr::case_when(
  (Petal.Length < 2.45 | is.na(Petal.Length)) ~ -0.3736209,
                         Petal.Length >= 2.45 ~ 0.3875872
)

Tree splitting - dev

Can let databases take advane of parallelism for further speed increases

tree_1 = dplyr::case_when(
  (Petal.Length < 2.45 | is.na(Petal.Length)) ~ -0.5555556,
                         Petal.Length >= 2.45 ~ 0.5769231
),
tree_2 = dplyr::case_when(
  (Petal.Length < 2.45 | is.na(Petal.Length)) ~ -0.4346052,
                         Petal.Length >= 2.45 ~ 0.4489912
),
tree_3 = dplyr::case_when(
  (Petal.Length < 2.45 | is.na(Petal.Length)) ~ -0.3736209,
                         Petal.Length >= 2.45 ~ 0.3875872
),
.pred = tree_1 + tree_2 + tree_3

Demo Time