diff --git a/Dockerfile b/Dockerfile index 18e42f4..2965c0b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3 +FROM python:3.8 WORKDIR /usr/src/app diff --git a/apis/models/house.py b/apis/models/house.py index e4661e8..17eda2c 100644 --- a/apis/models/house.py +++ b/apis/models/house.py @@ -18,8 +18,7 @@ class BostonHouseRequestModel(BaseModel): 9. RAD index of accessibility to radial highways 10. TAX full-value property-tax rate per $10,000 11. PTRATIO pupil-teacher ratio by town - 12. B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks - by town + 12. B 1000(Bk - 0.63)^2 Biased variable 13. LSTAT % lower status of the population """ @@ -70,8 +69,7 @@ class BostonHouseRequestModel(BaseModel): ) discriminateProportion: float = Field( example=396.30, - description="1000(Bk - 0.63)^2 where Bk is the proportion \ - of colored by town", + description="Discriminate proportion", ) percentLowerStatPopulation: float = Field( example=4.30, diff --git a/apis/models/masklang.py b/apis/models/masklang.py new file mode 100644 index 0000000..b8e9055 --- /dev/null +++ b/apis/models/masklang.py @@ -0,0 +1,14 @@ +from pydantic import BaseModel, Field + + +class MaskLanguageModelRequestModel(BaseModel): + predictionId: str = "f75ef3b8-f414-422c-87b1-1e21e684661c" + text: str = "Am I a [MASK] person if I save my green planet?" + + +class MaskLanguageModelResponseModel(BaseModel): + score: float = 0.1 + token: int = 100 + token_str: str = "token string" + orginal_sequence: str = "Sequence with [MASK]" + sequence: str = "Sequence with [MASK] replaced with token word" diff --git a/apis/v1/boston.py b/apis/v1/boston.py index 967f624..fc3a100 100644 --- a/apis/v1/boston.py +++ b/apis/v1/boston.py @@ -7,15 +7,13 @@ router = APIRouter(prefix="/boston") # Load trained model. Dummy model being trained on startup... -logger.info("Training/Loading iris classification model") +logger.info("Training/Loading boston linear reg model") trainer = BostonHousePriceTrainerInstance() boston_model = trainer.train() logger.info("Training completed") -@router.post( - "/trainModel", tags=["boston"], response_model=TrainingStatusResponse -) +@router.post("/trainModel", tags=["boston"], response_model=TrainingStatusResponse) async def boston_train(): training_id = uuid.uuid1() # Queue training / start training via RabbitMQ, Queue, etc.. @@ -27,9 +25,7 @@ async def boston_train(): } -@router.post( - "/predictPrice", tags=["boston"], response_model=BostonHouseResponseModel -) +@router.post("/predictPrice", tags=["boston"], response_model=BostonHouseResponseModel) async def boston_price_prediction(body: BostonHouseRequestModel): request = body.dict() payload = [x for x in request.values()] diff --git a/apis/v1/iris.py b/apis/v1/iris.py index d1831f0..7b69217 100644 --- a/apis/v1/iris.py +++ b/apis/v1/iris.py @@ -16,9 +16,7 @@ logger.info("Training completed") -@router.post( - "/trainModel", tags=["iris"], response_model=TrainingStatusResponse -) +@router.post("/trainModel", tags=["iris"], response_model=TrainingStatusResponse) async def iris_train(): training_id = uuid.uuid1() # Queue training / start training via RabbitMQ, Queue, etc.. diff --git a/apis/v1/masklang.py b/apis/v1/masklang.py new file mode 100644 index 0000000..5bfbd01 --- /dev/null +++ b/apis/v1/masklang.py @@ -0,0 +1,39 @@ +import time +from loguru import logger +from fastapi.routing import APIRouter +from core.pipeline import MaskLanguageModelPipe +from apis.models.base import TrainingStatusResponse +from apis.models.masklang import ( + MaskLanguageModelRequestModel, + MaskLanguageModelResponseModel, +) +from typing import List + +router = APIRouter(prefix="/mask") +# Load trained model. Dummy model being trained on startup... +logger.info("Training/Loading Mask language model") +masking_pipeline = MaskLanguageModelPipe() +logger.info("Model load completed") + +# Warm loading / Warm up +start = time.time() +logger.info("Model warm loading...") +masking_pipeline.predict("[MASK] I am good") +end = time.time() +logger.info(f"Model warm loading completed in {round(end-start,2)} secs") + + +@router.post( + "/predictMask", + tags=["langmask"], + response_model=List[MaskLanguageModelResponseModel], +) +async def mask_lang_prediction(payload: MaskLanguageModelRequestModel): + text = payload.text + logger.info(f"Recieved payload as {text}") + logger.info(f"Processing predictions...") + predictions = masking_pipeline.predict(text) + for pred in predictions: + pred["orginal_sequence"] = text + logger.info(f"Processed successfully") + return predictions diff --git a/core/pipeline.py b/core/pipeline.py new file mode 100644 index 0000000..cb2fbea --- /dev/null +++ b/core/pipeline.py @@ -0,0 +1,11 @@ +from transformers import pipeline + + +class MaskLanguageModelPipe: + def __init__(self): + self.task = "fill-mask" + self.model_name = "distilbert-base-uncased" + + def predict(self, text): + model = pipeline(self.task, model=self.model_name) + return model(text) diff --git a/main.py b/main.py index 882ff42..82f7a54 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,7 @@ from fastapi.responses import RedirectResponse from apis.v1.iris import router as iris_ns from apis.v1.boston import router as boston_ns +from apis.v1.masklang import router as masklang_ns # Initialize logging logger.add("./logs/katana.log", rotation="500 MB") @@ -17,6 +18,8 @@ app.include_router(iris_ns) logger.info("Adding Boston namespace route") app.include_router(boston_ns) +logger.info("Adding Mask Lang namespace route") +app.include_router(masklang_ns) @app.get("/", include_in_schema=False) diff --git a/requirements.txt b/requirements.txt index 543a7d3..bd8eed1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,5 @@ flake8==3.8.4 loguru==0.5.3 scikit-learn==0.24.1 uvicorn==0.13.4 +transformers==4.21.0 +torch==1.12.0 \ No newline at end of file