1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
|
library(tidymodels)
#> ── Attaching packages ──────────────────────────────────── tidymodels 0.1.0 ──
#> ✓ broom 0.5.6 ✓ recipes 0.1.12
#> ✓ dials 0.0.6 ✓ rsample 0.0.6
#> ✓ dplyr 0.8.5 ✓ tibble 3.0.1
#> ✓ ggplot2 3.3.0 ✓ tune 0.1.0
#> ✓ infer 0.5.1 ✓ workflows 0.1.1
#> ✓ parsnip 0.1.1 ✓ yardstick 0.0.6
#> ✓ purrr 0.3.4
#> ── Conflicts ─────────────────────────────────────── tidymodels_conflicts() ──
#> x purrr::accumulate() masks foreach::accumulate()
#> x purrr::discard() masks scales::discard()
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
#> x ggplot2::margin() masks dials::margin()
#> x recipes::step() masks stats::step()
#> x purrr::when() masks foreach::when()
library(rules)
data(car_prices, package = "modeldata")
set.seed(9932)
car_split <- initial_split(car_prices)
car_tr <- training(car_split)
car_te <- testing(car_split)
# A single rule set:
cubist_mod <-
cubist_rules(neighbors = 7) %>%
set_engine("Cubist")
cubist_fit <-
cubist_mod %>%
fit(log10(Price) ~ ., data = car_tr)
summary(cubist_fit$fit)
#>
#> Call:
#> cubist.default(x = x, y = y, committees = 1)
#>
#>
#> Cubist [Release 2.07 GPL Edition] Wed May 20 21:39:22 2020
#> ---------------------------------
#>
#> Target attribute `outcome'
#>
#> Read 603 cases (18 attributes) from undefined.data
#>
#> Model:
#>
#> Rule 1: [210 cases, mean 4.116360, range 3.94295 to 4.2505, est err 0.030756]
#>
#> if
#> Cylinder <= 4
#> Saab <= 0
#> then
#> outcome = 4.115185 + 0.12 Saab - 3.5e-06 Mileage + 0.017 Cylinder
#> - 0.087 hatchback - 0.029 Chevy + 0.046 wagon + 0.028 Leather
#> + 0.041 Cadillac - 0.024 sedan + 0.027 convertible
#> + 0.006 Doors + 0.012 Buick
#>
#> Rule 2: [8 cases, mean 4.207121, range 4.13308 to 4.26696, est err 0.006589]
#>
#> if
#> Cylinder > 4
#> Saturn > 0
#> then
#> outcome = 3.88624 + 0.057 Cylinder + 0.2 Saab + 0.141 Cadillac
#> - 3.8e-06 Mileage - 0.054 sedan + 0.094 convertible
#> - 0.085 hatchback + 0.019 Doors + 0.04 Buick + 0.014 Cruise
#> + 0.01 Leather + 0.007 Sound + 0.007 Saturn
#>
#> Rule 3: [33 cases, mean 4.229076, range 4.16741 to 4.29184, est err 0.012903]
#>
#> if
#> Cylinder > 4
#> Cruise <= 0
#> then
#> outcome = 4.265627 - 3.7e-06 Mileage + 0.039 Chevy
#>
#> Rule 4: [94 cases, mean 4.272727, range 4.18913 to 4.4427, est err 0.034717]
#>
#> if
#> Mileage > 3946
#> Cylinder > 4
#> Doors > 2
#> Cruise > 0
#> Buick <= 0
#> Cadillac <= 0
#> Saturn <= 0
#> then
#> outcome = 4.037203 + 0.051 Cylinder - 4.3e-06 Mileage + 0.061 Saab
#> + 0.044 Cadillac - 0.016 sedan + 0.029 convertible
#> - 0.026 hatchback + 0.006 Doors - 0.009 Chevy + 0.012 Buick
#> + 0.004 Cruise
#>
#> Rule 5: [57 cases, mean 4.314541, range 4.17208 to 4.42864, est err 0.049758]
#>
#> if
#> Buick > 0
#> then
#> outcome = 4.389884 - 3e-06 Mileage
#>
#> Rule 6: [9 cases, mean 4.341528, range 4.23957 to 4.66962, est err 0.036309]
#>
#> if
#> Mileage <= 3946
#> Cylinder > 4
#> Cadillac <= 0
#> then
#> outcome = 3.439093 + 5.28e-05 Mileage + 0.129 Cylinder
#>
#> Rule 7: [43 cases, mean 4.354487, range 4.1778 to 4.60071, est err 0.031792]
#>
#> if
#> Cylinder > 4
#> Doors <= 2
#> Cruise > 0
#> convertible <= 0
#> then
#> outcome = 3.40984 + 0.13 Cylinder + 0.116 Chevy - 2.7e-06 Mileage
#> + 0.037 Sound + 0.031 Leather
#>
#> Rule 8: [85 cases, mean 4.462877, range 4.34723 to 4.58348, est err 0.023398]
#>
#> if
#> Saab > 0
#> then
#> outcome = 4.522928 - 3.4e-06 Mileage + 0.064 Saab - 0.021 Doors
#> - 0.035 sedan + 0.009 Cylinder + 0.022 Cadillac
#> - 0.024 hatchback + 0.015 convertible - 0.004 Chevy
#> + 0.006 Buick
#>
#> Rule 9: [60 cases, mean 4.592824, range 4.44778 to 4.84976, est err 0.041948]
#>
#> if
#> Cadillac > 0
#> then
#> outcome = 4.774347 - 0.103 Doors + 0.036 Cylinder - 3.4e-06 Mileage
#>
#> Rule 10: [7 cases, mean 4.625017, range 4.58911 to 4.6727, est err 0.006627]
#>
#> if
#> Cylinder > 4
#> Cadillac <= 0
#> convertible > 0
#> then
#> outcome = 4.693132 - 3.9e-06 Mileage
#>
#>
#> Evaluation on training data (603 cases):
#>
#> Average |error| 0.032526
#> Relative |error| 0.23
#> Correlation coefficient 0.97
#>
#>
#> Attribute usage:
#> Conds Model
#>
#> 67% 84% Cylinder
#> 49% 66% Saab
#> 28% 66% Cadillac
#> 28% 17% Cruise
#> 25% 66% Buick
#> 23% 75% Doors
#> 17% 100% Mileage
#> 17% 1% Saturn
#> 8% 66% convertible
#> 77% Chevy
#> 66% hatchback
#> 66% sedan
#> 43% Leather
#> 35% wagon
#> 8% Sound
#>
#>
#> Time: 0.0 secs
predict(cubist_fit, car_te %>% select(-Price))
#> # A tibble: 201 x 1
#> .pred
#> <dbl>
#> 1 4.32
#> 2 4.49
#> 3 4.54
#> 4 4.54
#> 5 4.43
#> 6 4.43
#> 7 4.46
#> 8 4.44
#> 9 4.37
#> 10 4.48
#> # … with 191 more rows
|