Design-Optimization-Workflow/Function_BO_3.py at main · Jinge91/Design-Optimization-Workflow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
import numpy as np
import matplotlib.pyplot as plt
from skopt.learning import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel
from skopt.acquisition import gaussian_ei
from scipy.optimize import minimize
import pandas as pd

def runBO(df, input_cols, output_col, bounds=None):

    X_in = df[input_cols].values
    Y_out = df[output_col].values

    # 建议加上 log，因为熵产通常不能为负，且变化范围大
    # 如果数据里有负数，请注释掉这一行
    Y_out = np.log(Y_out)

    # =======================================================
    # 🛠️ 2. 边界处理逻辑
    # =======================================================
    if bounds is None:
        print("⚠️ 未提供边界，正在根据当前数据自动生成...")
        # 自动获取每一列的 min-10% 和 max+10%
        bounds = [ (df[col].min()*0.9, df[col].max()*1.1) for col in input_cols ]

    if len(bounds) != len(input_cols):
        raise ValueError(f"错误：变量数 {len(input_cols)} 与边界数 {len(bounds)} 不一致")

    print(f"Running BO with inputs: {input_cols}")
    print(f"Search Bounds: {bounds}")

    # 拟合 GP 模型
    kernel = Matern(nu=2.5)
    gpr_model = GaussianProcessRegressor(kernel=kernel,
                                         n_restarts_optimizer=20,
                                         alpha=1e-3,
                                         normalize_y=True,
                                         random_state=0)
    gpr_model.fit(X_in, Y_out)
    y_opt = np.min(Y_out) # 当前观测到的最小值

    next_point = []
    exploring_intensitvity = 0.001    # small make EI to focus on exploitation
    # ================= CASE A: 2D 可视化模式 =================
    if len(input_cols) == 2:
        print(">>> 2D 模式：正在绘图并寻找连续最优解...")

        # ---------------------------------------------------
        # 第一部分：生成网格（仅用于画图，分辨率可以调到 100 让图更平滑）
        # ---------------------------------------------------
        res = 100
        x1 = np.linspace(bounds[0][0], bounds[0][1], res)
        x2 = np.linspace(bounds[1][0], bounds[1][1], res)
        X1, X2 = np.meshgrid(x1, x2)
        X_grid = np.vstack((X1.ravel(), X2.ravel())).T

        # 计算网格点上的预测值和 EI，用于后续渲染热图
        pred_mean, pred_std = gpr_model.predict(X_grid, return_std=True)
        ei_values = gaussian_ei(X=X_grid, model=gpr_model, y_opt=y_opt, xi=exploring_intensitvity)

        # ---------------------------------------------------
        # 第二部分：【核心修复】使用连续优化器寻找真实 EI 最大值
        # ---------------------------------------------------

        # ========================== gradient optimizer L-BFGS-B maximize EI ==========================
        def min_func(x):
            return -gaussian_ei(X=x.reshape(1, -1), model=gpr_model, y_opt=y_opt, xi=exploring_intensitvity)[0]

        # 1. 瞬间生成海量的随机候选点 (比如 2000 个)
        # 相比于生成规则网格，随机撒点完全不惧怕维度增加
        num_random_samples = 5000
        random_X = np.random.uniform(
            low=[b[0] for b in bounds],
            high=[b[1] for b in bounds],
            size=(num_random_samples, len(bounds))
        )

        # 2. 批量计算这 2000 个点的 EI 值
        # 这一步纯粹是矩阵运算，完全不需要求导数，速度极快！
        random_ei_values = gaussian_ei(X=random_X, model=gpr_model, y_opt=y_opt, xi=exploring_intensitvity)

        # 3. 找出 EI 值排名前 10 的点，作为优化器的“精英起跑线”
        # np.argsort 会从小到大排序，我们取最后 10 个索引
        top_10_indices = np.argsort(random_ei_values)[-10:]
        elite_starting_points = random_X[top_10_indices]

        best_ei_val = np.inf
        next_point = None

        # 4. 只对这 10 个“最有潜力”的点启动 L-BFGS-B 进行精确爬坡
        for x0 in elite_starting_points:
            res_opt = minimize(min_func, x0=x0, bounds=bounds, method='L-BFGS-B')
            if res_opt.fun < best_ei_val:
                best_ei_val = res_opt.fun
                next_point = res_opt.x


        # ===================== Non-gradient optimizer maximize EI ======================
        # from scipy.optimize import differential_evolution

        # def min_func(x):
        #     return -gaussian_ei(X=x.reshape(1, -1), model=gpr_model, y_opt=y_opt, xi=exploring_intensitvity)[0]

        # print("🔍 正在使用差分进化算法(全局优化)寻找最大 EI...")

        # # 直接把边界丢给 differential_evolution，它自己会在里面繁衍、变异、寻找全局最优
        # res_opt = differential_evolution(min_func, bounds=bounds, popsize=20, maxiter=100)

        # best_x = res_opt.x
        # next_point = res_opt.x


        best_x = next_point # 将找出的真实连续极值点赋给 best_x，供画图使用
        # 5. predict next Obj value
        pred_val = gpr_model.predict(np.array([best_x]))


        # ==========================================
        # 🎨 【核心修复】这里是画图代码
        # ==========================================
        fig = plt.figure(figsize=(16, 14))

        # --------------------------------------------------
        # 🟢 第一排左 (221): GP 3D 预测曲面
        # --------------------------------------------------
        ax1 = fig.add_subplot(221, projection='3d')
        surf1 = ax1.plot_surface(X1, X2, pred_mean.reshape(X1.shape), cmap='viridis', alpha=0.8, edgecolor='none')
        ax1.scatter(X_in[:,0], X_in[:,1], Y_out, c='red', s=50, edgecolors='k', label='Observed Data', zorder=10)

        ax1.set_xlim(bounds[0][0], bounds[0][1])
        ax1.set_ylim(bounds[1][0], bounds[1][1])
        # ax1.set_zlim(0, 1.5) # 限制 Z 轴高度，防止被 6.97 的离群点带偏

        ax1.set_title(f'3D GP Prediction (Mean)\nTarget: {output_col}')
        ax1.set_xlabel(input_cols[0])
        ax1.set_ylabel(input_cols[1])
        fig.colorbar(surf1, ax=ax1, shrink=0.5, aspect=10)

        # --------------------------------------------------
        # 🟢 第一排右 (222): EI 3D 采集函数
        # --------------------------------------------------
        ax2 = fig.add_subplot(222, projection='3d')
        surf2 = ax2.plot_surface(X1, X2, ei_values.reshape(X1.shape), cmap='plasma', alpha=0.8, edgecolor='none')
        ax2.scatter(best_x[0], best_x[1], np.max(ei_values), c='lime', s=200, marker='*', edgecolors='k', label='Next Point', zorder=10)

        ax2.set_xlim(bounds[0][0], bounds[0][1])
        ax2.set_ylim(bounds[1][0], bounds[1][1])
        ax2.set_title('3D Acquisition Function (EI)')
        ax2.set_xlabel(input_cols[0])
        ax2.set_ylabel(input_cols[1])
        ax2.legend()

        # --------------------------------------------------
        # 🔵 第二排左 (223): GP 2D 热图
        # --------------------------------------------------
        ax3 = fig.add_subplot(223)
        c3 = ax3.contourf(X1, X2, pred_mean.reshape(X1.shape), levels=50, cmap='viridis')
        ax3.scatter(X_in[:,0], X_in[:,1], c='red', s=60, edgecolors='k', label='Observed Data', zorder=5)

        ax3.set_xlim(bounds[0][0], bounds[0][1])
        ax3.set_ylim(bounds[1][0], bounds[1][1])
        ax3.set_title('2D Heatmap: GP Prediction')
        ax3.set_xlabel(input_cols[0])
        ax3.set_ylabel(input_cols[1])
        fig.colorbar(c3, ax=ax3)

        # --------------------------------------------------
        # 🔵 第二排右 (224): EI 2D 热图
        # --------------------------------------------------
        ax4 = fig.add_subplot(224)
        c4 = ax4.contourf(X1, X2, ei_values.reshape(X1.shape), levels=50, cmap='plasma')
        ax4.scatter(best_x[0], best_x[1], c='lime', s=250, marker='*', edgecolors='k', label='Next Point', zorder=5)

        ax4.set_xlim(bounds[0][0], bounds[0][1])
        ax4.set_ylim(bounds[1][0], bounds[1][1])
        ax4.set_title('2D Heatmap: Acquisition Function (EI)')
        ax4.set_xlabel(input_cols[0])
        ax4.set_ylabel(input_cols[1])
        fig.colorbar(c4, ax=ax4)

        # 计算区间的跨度 (Max - Min)
        range_x = bounds[0][1] - bounds[0][0]
        range_y = bounds[1][1] - bounds[1][0]

        # 设定留白比例，比如 5% (0.05)
        margin = 0.05

        # 自动计算出的留白值
        pad_x = range_x * margin
        pad_y = range_y * margin

        # 统一应用到四个图上
        for ax in [ax1, ax2, ax3, ax4]:
            ax.set_xlim(bounds[0][0] - pad_x, bounds[0][1] + pad_x)
            ax.set_ylim(bounds[1][0] - pad_y, bounds[1][1] + pad_y)

        # plt.tight_layout()
        # plt.show()

    # ================= CASE B: 高维数值优化模式 =================
    else:

    # ========================== gradient optimizer L-BFGS-B maximize EI ==========================
        def min_func(x):
            return -gaussian_ei(X=x.reshape(1, -1), model=gpr_model, y_opt=y_opt, xi=exploring_intensitvity)[0]

        # 1. 瞬间生成海量的随机候选点 (比如 2000 个)
        # 相比于生成规则网格，随机撒点完全不惧怕维度增加
        num_random_samples = 2000
        random_X = np.random.uniform(
            low=[b[0] for b in bounds],
            high=[b[1] for b in bounds],
            size=(num_random_samples, len(bounds))
        )

        # 2. 批量计算这 2000 个点的 EI 值
        # 这一步纯粹是矩阵运算，完全不需要求导数，速度极快！
        random_ei_values = gaussian_ei(X=random_X, model=gpr_model, y_opt=y_opt, xi = exploring_intensitvity)

        # 3. 找出 EI 值排名前 10 的点，作为优化器的“精英起跑线”
        # np.argsort 会从小到大排序，我们取最后 10 个索引
        top_10_indices = np.argsort(random_ei_values)[-10:]
        elite_starting_points = random_X[top_10_indices]

        best_ei_val = np.inf
        next_point = None

        # 4. 只对这 10 个“最有潜力”的点启动 L-BFGS-B 进行精确爬坡
        for x0 in elite_starting_points:
            res_opt = minimize(min_func, x0=x0, bounds=bounds, method='L-BFGS-B')
            if res_opt.fun < best_ei_val:
                best_ei_val = res_opt.fun
                next_point = res_opt.x

 # ===================== Non-gradient optimizer maximize EI ======================
        # from scipy.optimize import differential_evolution

        # # def min_func(x):
        # #     return -gaussian_ei(X=x.reshape(1, -1), model=gpr_model, y_opt=y_opt, xi=exploring_intensitvity)[0]

        # # print("🔍 正在使用差分进化算法(全局优化)寻找最大 EI...")

        # # # 直接把边界丢给 differential_evolution，它自己会在里面繁衍、变异、寻找全局最优
        # # res_opt = differential_evolution(min_func, bounds=bounds, popsize=20, maxiter=100)

        # best_x = res_opt.x
        # next_point = res_opt.x

    # 返回结果
    next_point_dict = {col: val for col, val in zip(input_cols, next_point)}
    pred_val = gpr_model.predict(np.array([best_x]))
    next_point_dict["pred_obj"] = pred_val[0]
    next_point_dict["pred_obj"] = np.exp(pred_val[0])
    return next_point_dict


if __name__ =="__main__":
    # 您的调试代码保持不变
    # num_samples = 10
    # width_data = np.round(np.random.uniform(0.2, 2.0, num_samples), 4)
    # flow_data  = np.round(np.random.uniform(0.1, 1.0, num_samples), 4)
    # entropy_data = np.round(np.random.uniform(0.5, 100.0, num_samples), 4)

    # df_debug = pd.DataFrame({
    #                         "Channel_Width": width_data,
    #                         "Flow_Rate": flow_data,
    #                         "Entropy": entropy_data
    #                                                 })

    # input_cols = ["Channel_Width", "Flow_Rate"]
    # output_col = "Entropy"
    # debug_bounds = [(0.2, 2.0), (0.1, 1.0)]

    # print("🚀 开始测试 runBO ...")
    # next_point = runBO(df_debug, input_cols, output_col, bounds=debug_bounds)
    # print("推荐点:", next_point)


    import pandas as pd

    df = pd.read_csv(r'C:\Users\lichengd\OneDrive - Oregon State University\1. PhD\Chiller\automation\Chiller_auto_Python\ALL_complete_code_here\CFD_Final_Result_run10_fixiter50.csv',encoding='gbk')

    # 1. 定义你需要的列名列表
    input_cols = ['width', 'flowrate']
    output_cols = 'entropy'
    all_cols = input_cols+ [output_cols]

    top_10 = df[all_cols].head(50)

    # 2. 仅筛选这些列，并转换为字典
    # df[target_columns] 会创建一个只包含这些列的新 DataFrame
    # data_dict = df[target_columns].to_dict(orient='records')

    print(top_10)

    debug_bounds = [(0.2, 2.0), (0.001, 1.0)]

    print("🚀 开始测试 runBO ...")
    next_point = runBO(top_10, input_cols, output_cols, bounds=debug_bounds)
    print(next_point)