-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpopulate_code.py
More file actions
135 lines (108 loc) · 4.13 KB
/
populate_code.py
File metadata and controls
135 lines (108 loc) · 4.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
print("🚀 Populating files with Machine Learning and API code...")
files = {
"src/features.py": """
import pandas as pd
import numpy as np
def add_features(df: pd.DataFrame) -> pd.DataFrame:
'''Adds behavior and velocity features for fraud detection.'''
df = df.copy()
# Handle missing or zero values to avoid errors
df["prev_24h_tx_count_card"] = df.get("prev_24h_tx_count_card", 0)
df["prev_24h_amt_card"] = df.get("prev_24h_amt_card", 0)
df["velocity_amt_1h"] = df.get("velocity_amt_1h", 0)
df["amount"] = df.get("amount", 0)
df["log_amount"] = np.log1p(df["amount"])
df["avg_tx_amt_24h"] = (df["prev_24h_amt_card"] / (df["prev_24h_tx_count_card"] + 1e-3))
df["velocity_ratio"] = df["velocity_amt_1h"] / (df["avg_tx_amt_24h"] + 1e-3)
return df
""",
"src/train.py": """
import pandas as pd
import numpy as np
import os
import joblib
from xgboost import XGBClassifier
from sklearn.metrics import average_precision_score, confusion_matrix
from features import add_features
print("🚀 Starting Model Training Process...")
# 1. Mock Data Generation
np.random.seed(42)
num_samples = 2000
df = pd.DataFrame({
"amount": np.random.exponential(50, num_samples),
"prev_24h_amt_card": np.random.uniform(0, 500, num_samples),
"prev_24h_tx_count_card": np.random.randint(0, 10, num_samples),
"velocity_amt_1h": np.random.uniform(0, 200, num_samples),
"prev_1h_tx_count_card": np.random.randint(0, 5, num_samples),
"dayofweek": np.random.randint(0, 7, num_samples),
"is_fraud": np.random.choice([0, 1], num_samples, p=[0.95, 0.05])
})
# 2. Feature Engineering
df = add_features(df)
# 3. Train-Validation Split
cut = int(len(df) * 0.8)
train, valid = df.iloc[:cut], df.iloc[cut:]
X_train, y_train = train.drop(columns=["is_fraud"]), train["is_fraud"]
X_valid, y_valid = valid.drop(columns=["is_fraud"]), valid["is_fraud"]
# 4. Train Model
print("🧠 Training XGBoost Model...")
pos_weight = (len(y_train) - y_train.sum()) / max(1, y_train.sum())
model = XGBClassifier(n_estimators=100, max_depth=4, scale_pos_weight=pos_weight, random_state=42)
model.fit(X_train, y_train)
# 5. Evaluate
preds = model.predict_proba(X_valid)[:, 1]
print(f"✅ PR-AUC Score: {average_precision_score(y_valid, preds):.4f}")
# 6. Save Model to the 'models' directory
os.makedirs("../models", exist_ok=True)
joblib.dump({"model": model, "threshold": 0.5}, "../models/fraud_xgb.joblib")
print("🎉 Model saved successfully to models/fraud_xgb.joblib!")
""",
"main.py": """
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import pandas as pd
import sys
import os
# Add 'src' to path so we can import features.py
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
from features import add_features
app = FastAPI(title="Real-Time Fraud Detection API")
# Load Model safely
model_path = "models/fraud_xgb.joblib"
if os.path.exists(model_path):
bundle = joblib.load(model_path)
model = bundle["model"]
THRESHOLD = bundle["threshold"]
else:
model = None
class Transaction(BaseModel):
amount: float
prev_24h_tx_count_card: float
prev_24h_amt_card: float
velocity_amt_1h: float
prev_1h_tx_count_card: float
dayofweek: int
@app.get("/")
def home():
return {"message": "Fraud Detection API is running. Go to /docs to test it."}
@app.post("/score")
def score_transaction(tx: Transaction):
if not model:
return {"error": "Model not found. Please run 'python src/train.py' first."}
df = pd.DataFrame([tx.model_dump()])
df = add_features(df)
prob = model.predict_proba(df)[:, 1][0]
decision = "REVIEW (FRAUD ALERT)" if prob >= THRESHOLD else "ALLOW"
return {
"probability": round(prob, 4),
"decision": decision
}
"""
}
for filepath, content in files.items():
with open(filepath, "w", encoding="utf-8") as f:
f.write(content.strip())
print(f"📄 Successfully wrote code to: {filepath}")
print("✅ All files are populated and ready to run!")