Card-Fraud-Detection/populate_code.py at main · dalimkumar452-sudo/Card-Fraud-Detection · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os

print("🚀 Populating files with Machine Learning and API code...")

files = {
    "src/features.py": """
import pandas as pd
import numpy as np

def add_features(df: pd.DataFrame) -> pd.DataFrame:
    '''Adds behavior and velocity features for fraud detection.'''
    df = df.copy()

    # Handle missing or zero values to avoid errors
    df["prev_24h_tx_count_card"] = df.get("prev_24h_tx_count_card", 0)
    df["prev_24h_amt_card"] = df.get("prev_24h_amt_card", 0)
    df["velocity_amt_1h"] = df.get("velocity_amt_1h", 0)
    df["amount"] = df.get("amount", 0)

    df["log_amount"] = np.log1p(df["amount"])
    df["avg_tx_amt_24h"] = (df["prev_24h_amt_card"] / (df["prev_24h_tx_count_card"] + 1e-3))
    df["velocity_ratio"] = df["velocity_amt_1h"] / (df["avg_tx_amt_24h"] + 1e-3)

    return df
""",

    "src/train.py": """
import pandas as pd
import numpy as np
import os
import joblib
from xgboost import XGBClassifier
from sklearn.metrics import average_precision_score, confusion_matrix
from features import add_features

print("🚀 Starting Model Training Process...")

# 1. Mock Data Generation
np.random.seed(42)
num_samples = 2000
df = pd.DataFrame({
    "amount": np.random.exponential(50, num_samples),
    "prev_24h_amt_card": np.random.uniform(0, 500, num_samples),
    "prev_24h_tx_count_card": np.random.randint(0, 10, num_samples),
    "velocity_amt_1h": np.random.uniform(0, 200, num_samples),
    "prev_1h_tx_count_card": np.random.randint(0, 5, num_samples),
    "dayofweek": np.random.randint(0, 7, num_samples),
    "is_fraud": np.random.choice([0, 1], num_samples, p=[0.95, 0.05])
})

# 2. Feature Engineering
df = add_features(df)

# 3. Train-Validation Split
cut = int(len(df) * 0.8)
train, valid = df.iloc[:cut], df.iloc[cut:]

X_train, y_train = train.drop(columns=["is_fraud"]), train["is_fraud"]
X_valid, y_valid = valid.drop(columns=["is_fraud"]), valid["is_fraud"]

# 4. Train Model
print("🧠 Training XGBoost Model...")
pos_weight = (len(y_train) - y_train.sum()) / max(1, y_train.sum())
model = XGBClassifier(n_estimators=100, max_depth=4, scale_pos_weight=pos_weight, random_state=42)
model.fit(X_train, y_train)

# 5. Evaluate
preds = model.predict_proba(X_valid)[:, 1]
print(f"✅ PR-AUC Score: {average_precision_score(y_valid, preds):.4f}")

# 6. Save Model to the 'models' directory
os.makedirs("../models", exist_ok=True)
joblib.dump({"model": model, "threshold": 0.5}, "../models/fraud_xgb.joblib")
print("🎉 Model saved successfully to models/fraud_xgb.joblib!")
""",

    "main.py": """
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import pandas as pd
import sys
import os

# Add 'src' to path so we can import features.py
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
from features import add_features

app = FastAPI(title="Real-Time Fraud Detection API")

# Load Model safely
model_path = "models/fraud_xgb.joblib"
if os.path.exists(model_path):
    bundle = joblib.load(model_path)
    model = bundle["model"]
    THRESHOLD = bundle["threshold"]
else:
    model = None

class Transaction(BaseModel):
    amount: float
    prev_24h_tx_count_card: float
    prev_24h_amt_card: float
    velocity_amt_1h: float
    prev_1h_tx_count_card: float
    dayofweek: int

@app.get("/")
def home():
    return {"message": "Fraud Detection API is running. Go to /docs to test it."}

@app.post("/score")
def score_transaction(tx: Transaction):
    if not model:
        return {"error": "Model not found. Please run 'python src/train.py' first."}

    df = pd.DataFrame([tx.model_dump()])
    df = add_features(df)

    prob = model.predict_proba(df)[:, 1][0]
    decision = "REVIEW (FRAUD ALERT)" if prob >= THRESHOLD else "ALLOW"

    return {
        "probability": round(prob, 4),
        "decision": decision
    }
"""
}

for filepath, content in files.items():
    with open(filepath, "w", encoding="utf-8") as f:
        f.write(content.strip())
    print(f"📄 Successfully wrote code to: {filepath}")

print("✅ All files are populated and ready to run!")