From f0a564b9eee7a16eca964ea97e173e379ed648ed Mon Sep 17 00:00:00 2001 From: shaz13 Date: Wed, 3 Aug 2022 19:52:18 +0530 Subject: [PATCH 1/3] Adding lang mask model --- Dockerfile | 2 +- apis/models/masklang.py | 14 ++++++++++++++ apis/v1/boston.py | 8 ++------ apis/v1/iris.py | 4 +--- apis/v1/masklang.py | 36 ++++++++++++++++++++++++++++++++++++ core/pipeline.py | 11 +++++++++++ main.py | 3 +++ requirements.txt | 2 ++ 8 files changed, 70 insertions(+), 10 deletions(-) create mode 100644 apis/models/masklang.py create mode 100644 apis/v1/masklang.py create mode 100644 core/pipeline.py diff --git a/Dockerfile b/Dockerfile index 18e42f4..2965c0b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3 +FROM python:3.8 WORKDIR /usr/src/app diff --git a/apis/models/masklang.py b/apis/models/masklang.py new file mode 100644 index 0000000..b8e9055 --- /dev/null +++ b/apis/models/masklang.py @@ -0,0 +1,14 @@ +from pydantic import BaseModel, Field + + +class MaskLanguageModelRequestModel(BaseModel): + predictionId: str = "f75ef3b8-f414-422c-87b1-1e21e684661c" + text: str = "Am I a [MASK] person if I save my green planet?" + + +class MaskLanguageModelResponseModel(BaseModel): + score: float = 0.1 + token: int = 100 + token_str: str = "token string" + orginal_sequence: str = "Sequence with [MASK]" + sequence: str = "Sequence with [MASK] replaced with token word" diff --git a/apis/v1/boston.py b/apis/v1/boston.py index 967f624..b93483a 100644 --- a/apis/v1/boston.py +++ b/apis/v1/boston.py @@ -13,9 +13,7 @@ logger.info("Training completed") -@router.post( - "/trainModel", tags=["boston"], response_model=TrainingStatusResponse -) +@router.post("/trainModel", tags=["boston"], response_model=TrainingStatusResponse) async def boston_train(): training_id = uuid.uuid1() # Queue training / start training via RabbitMQ, Queue, etc.. @@ -27,9 +25,7 @@ async def boston_train(): } -@router.post( - "/predictPrice", tags=["boston"], response_model=BostonHouseResponseModel -) +@router.post("/predictPrice", tags=["boston"], response_model=BostonHouseResponseModel) async def boston_price_prediction(body: BostonHouseRequestModel): request = body.dict() payload = [x for x in request.values()] diff --git a/apis/v1/iris.py b/apis/v1/iris.py index d1831f0..7b69217 100644 --- a/apis/v1/iris.py +++ b/apis/v1/iris.py @@ -16,9 +16,7 @@ logger.info("Training completed") -@router.post( - "/trainModel", tags=["iris"], response_model=TrainingStatusResponse -) +@router.post("/trainModel", tags=["iris"], response_model=TrainingStatusResponse) async def iris_train(): training_id = uuid.uuid1() # Queue training / start training via RabbitMQ, Queue, etc.. diff --git a/apis/v1/masklang.py b/apis/v1/masklang.py new file mode 100644 index 0000000..e4d50da --- /dev/null +++ b/apis/v1/masklang.py @@ -0,0 +1,36 @@ +import time +from loguru import logger +from fastapi.routing import APIRouter +from core.pipeline import MaskLanguageModelPipe +from apis.models.base import TrainingStatusResponse +from apis.models.masklang import ( + MaskLanguageModelRequestModel, + MaskLanguageModelResponseModel, +) +from typing import List + +router = APIRouter(prefix="/mask") +# Load trained model. Dummy model being trained on startup... +logger.info("Training/Loading Mask language model") +model = MaskLanguageModelPipe() +logger.info("Model load completed") + +# Warm loading / Warm up +start = time.time() +logger.info("Model warm loading ...") +model.predict("[MASK] I am good") +end = time.time() +logger.info(f"Model warm loading completed in {round(end-start,2)} secs") + + +@router.post( + "/predictMask", + tags=["langmask"], + response_model=List[MaskLanguageModelResponseModel], +) +async def mask_lang_prediction(payload: MaskLanguageModelRequestModel): + text = payload.text + predictions = model.predict(text) + for pred in predictions: + pred["orginal_sequence"] = text + return predictions diff --git a/core/pipeline.py b/core/pipeline.py new file mode 100644 index 0000000..cb2fbea --- /dev/null +++ b/core/pipeline.py @@ -0,0 +1,11 @@ +from transformers import pipeline + + +class MaskLanguageModelPipe: + def __init__(self): + self.task = "fill-mask" + self.model_name = "distilbert-base-uncased" + + def predict(self, text): + model = pipeline(self.task, model=self.model_name) + return model(text) diff --git a/main.py b/main.py index 882ff42..82f7a54 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,7 @@ from fastapi.responses import RedirectResponse from apis.v1.iris import router as iris_ns from apis.v1.boston import router as boston_ns +from apis.v1.masklang import router as masklang_ns # Initialize logging logger.add("./logs/katana.log", rotation="500 MB") @@ -17,6 +18,8 @@ app.include_router(iris_ns) logger.info("Adding Boston namespace route") app.include_router(boston_ns) +logger.info("Adding Mask Lang namespace route") +app.include_router(masklang_ns) @app.get("/", include_in_schema=False) diff --git a/requirements.txt b/requirements.txt index 543a7d3..bd8eed1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,5 @@ flake8==3.8.4 loguru==0.5.3 scikit-learn==0.24.1 uvicorn==0.13.4 +transformers==4.21.0 +torch==1.12.0 \ No newline at end of file From 78b1f3f12ce97ba94f7e8f17ea7db70b424c0488 Mon Sep 17 00:00:00 2001 From: shaz13 Date: Wed, 3 Aug 2022 22:55:28 +0530 Subject: [PATCH 2/3] init pipiline --- apis/v1/boston.py | 2 +- apis/v1/masklang.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/apis/v1/boston.py b/apis/v1/boston.py index b93483a..fc3a100 100644 --- a/apis/v1/boston.py +++ b/apis/v1/boston.py @@ -7,7 +7,7 @@ router = APIRouter(prefix="/boston") # Load trained model. Dummy model being trained on startup... -logger.info("Training/Loading iris classification model") +logger.info("Training/Loading boston linear reg model") trainer = BostonHousePriceTrainerInstance() boston_model = trainer.train() logger.info("Training completed") diff --git a/apis/v1/masklang.py b/apis/v1/masklang.py index e4d50da..106db10 100644 --- a/apis/v1/masklang.py +++ b/apis/v1/masklang.py @@ -5,20 +5,20 @@ from apis.models.base import TrainingStatusResponse from apis.models.masklang import ( MaskLanguageModelRequestModel, - MaskLanguageModelResponseModel, + MaskLanguageModelResponseModel ) from typing import List router = APIRouter(prefix="/mask") # Load trained model. Dummy model being trained on startup... logger.info("Training/Loading Mask language model") -model = MaskLanguageModelPipe() +masking_pipeline = MaskLanguageModelPipe() logger.info("Model load completed") # Warm loading / Warm up start = time.time() -logger.info("Model warm loading ...") -model.predict("[MASK] I am good") +logger.info("Model warm loading...") +masking_pipeline.predict("[MASK] I am good") end = time.time() logger.info(f"Model warm loading completed in {round(end-start,2)} secs") @@ -30,7 +30,10 @@ ) async def mask_lang_prediction(payload: MaskLanguageModelRequestModel): text = payload.text - predictions = model.predict(text) + logger.info(f"Recieved payload as {text}") + logger.info(f"Processing predictions...") + predictions = masking_pipeline.predict(text) for pred in predictions: pred["orginal_sequence"] = text + logger.info(f"Processed successfully") return predictions From b5a386dc57adb5c6cd3fe3f7d23c16724d0f01b6 Mon Sep 17 00:00:00 2001 From: shaz13 Date: Thu, 4 Aug 2022 20:41:44 +0530 Subject: [PATCH 3/3] AV DataHour commit --- apis/models/house.py | 6 ++---- apis/v1/masklang.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/apis/models/house.py b/apis/models/house.py index e4661e8..17eda2c 100644 --- a/apis/models/house.py +++ b/apis/models/house.py @@ -18,8 +18,7 @@ class BostonHouseRequestModel(BaseModel): 9. RAD index of accessibility to radial highways 10. TAX full-value property-tax rate per $10,000 11. PTRATIO pupil-teacher ratio by town - 12. B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks - by town + 12. B 1000(Bk - 0.63)^2 Biased variable 13. LSTAT % lower status of the population """ @@ -70,8 +69,7 @@ class BostonHouseRequestModel(BaseModel): ) discriminateProportion: float = Field( example=396.30, - description="1000(Bk - 0.63)^2 where Bk is the proportion \ - of colored by town", + description="Discriminate proportion", ) percentLowerStatPopulation: float = Field( example=4.30, diff --git a/apis/v1/masklang.py b/apis/v1/masklang.py index 106db10..5bfbd01 100644 --- a/apis/v1/masklang.py +++ b/apis/v1/masklang.py @@ -5,7 +5,7 @@ from apis.models.base import TrainingStatusResponse from apis.models.masklang import ( MaskLanguageModelRequestModel, - MaskLanguageModelResponseModel + MaskLanguageModelResponseModel, ) from typing import List