diff --git a/models/ResNet50/README.md b/models/ResNet50/README.md new file mode 100644 index 0000000..885fcae --- /dev/null +++ b/models/ResNet50/README.md @@ -0,0 +1,44 @@ +# Infant Cry Classification ResNet50 Model + +# Overview +This repository contains a ResNet50 model for classifying infant cry sounds. The model achieved an accuracy of 84.273% on the test dataset, showcasing its ability to capture intricate features of infant cry patterns. + +# Model Architecture +The ResNet50 architecture is designed to facilitate training of very deep networks. It includes residual blocks that enable the training of deeper networks without the vanishing gradient problem. + + +Model: "resnet50" +__________________________________________________________________________________________ +Layer (type) Output Shape Param # Connected to +========================================================================================== +input_1 (InputLayer) [(None, 224, 224, 3) 0 +__________________________________________________________________________________________ +conv1_pad (ZeroPadding2D) (None, 230, 230, 3) 0 input_1[0][0] +__________________________________________________________________________________________ +conv1_conv (Conv2D) (None, 112, 112, 64 9472 conv1_pad[0][0] +... +__________________________________________________________________________________________ +dense_3 (Dense) (None, 1) 2049 global_average_pooling2d_1[0][0] +========================================================================================== +Total params: 23,587,713 +Trainable params: 23,534,593 +Non-trainable params: 53,120 + +# Dataset +The model was trained on a diverse dataset containing recordings of infant cry sounds. The dataset includes various cry patterns and non-cry sounds to ensure robust classification. + +# Training +The ResNet50 model was trained using TensorFlow and Keras with an Adam optimizer. The training process involved data augmentation techniques to enhance model generalization. The training accuracy reached 90%, while the validation accuracy reached 88%. + +# Evaluation +The model achieved an accuracy of 84.273% on the test dataset even though dataset is bit imbalanced, highlighting its ability to accurately classify infant cry sounds. The model's precision, recall, and F1-score metrics are commendable. + +# Usage +To use the trained ResNet50 model for inference, you can load the model weights using the provided script: + + +python load_resnet50_model.py --weights path/to/resnet50_weights.h5 --audio path/to/test_audio.wav +Replace path/to/resnet50_weights.h5 with the path to the saved model weights and path/to/test_audio.wav with the path to the audio file you want to classify. + +# Acknowledgments +I would like to express our gratitude to the Maintainers and data providers who made this project possible. diff --git a/models/ResNet50/cry-analyzer-using-resnet50.ipynb b/models/ResNet50/cry-analyzer-using-resnet50.ipynb new file mode 100644 index 0000000..0239040 --- /dev/null +++ b/models/ResNet50/cry-analyzer-using-resnet50.ipynb @@ -0,0 +1 @@ +{"metadata":{"colab":{"provenance":[],"gpuType":"T4","collapsed_sections":["99FRzLHUzvjR","Yd64T_vhz0qK","ZbKrHG-CqOsv"]},"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"accelerator":"GPU","kaggle":{"accelerator":"gpu","dataSources":[{"sourceId":7288843,"sourceType":"datasetVersion","datasetId":4227039}],"dockerImageVersionId":30627,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# โฌ๏ธ Import Libraries","metadata":{}},{"cell_type":"code","source":"import numpy as np\nimport pandas as pd\nimport os\nimport librosa\nimport librosa.display\nimport matplotlib.pyplot as plt\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import normalize\nimport warnings\nwarnings.filterwarnings('ignore')\nfrom sklearn.model_selection import train_test_split\nimport tensorflow\nimport numpy as np\nfrom sklearn.utils.class_weight import compute_class_weight\nfrom tensorflow.keras.models import Sequential\nfrom tensorflow.keras.layers import Input, Flatten, Dense, Dropout, Resizing, Normalization\nfrom tensorflow.keras.optimizers import AdamW\nfrom tensorflow.keras.applications import VGG16\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\nfrom tensorflow.keras.optimizers.schedules import ExponentialDecay\nfrom sklearn.model_selection import train_test_split\nfrom tensorflow.keras.utils import to_categorical\n\nfrom tensorflow.keras.layers import LSTM, Dense","metadata":{"id":"Pti8RhxqMjTe","execution":{"iopub.status.busy":"2023-12-27T10:03:41.985875Z","iopub.execute_input":"2023-12-27T10:03:41.986171Z","iopub.status.idle":"2023-12-27T10:03:55.051485Z","shell.execute_reply.started":"2023-12-27T10:03:41.986145Z","shell.execute_reply":"2023-12-27T10:03:55.050562Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","output_type":"stream"}]},{"cell_type":"markdown","source":"