notebook-examples/whisper_test_multimodel.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b56703b7",
   "metadata": {},
   "source": [
    "*this notebook requires a working PyTorch GPU environment* "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3967f4b4",
   "metadata": {},
   "source": [
    "#  OpenAI's Whisper multimodel\n",
    "\n",
    "Speech to text...\n",
    "\n",
    "more information at\n",
    "- https://openai.com/blog/whisper\n",
    "- https://github.com/openai/whisper\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dac0ca1b-b098-426a-982a-777049f40581",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install git+https://github.com/openai/whisper.git "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d10ec51-1c42-40de-a6ac-f2b70566b9a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import torch\n",
    "import whisper\n",
    "from datetime import datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ffe9e04-cc7f-4c65-b9a3-10bb1a5eac41",
   "metadata": {},
   "outputs": [],
   "source": [
    "Models = [\n",
    "    'base',\n",
    "    'medium',\n",
    "    ]\n",
    "\n",
    "Files = [\n",
    "    \"./TestdateiAudiotranskription.mp3\"\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca9f2fae-f7f9-4846-afd5-7c27253e4061",
   "metadata": {},
   "outputs": [],
   "source": [
    "LogFile = \"./log.md\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f7fbfb3-a116-4dcb-a154-188a26291f47",
   "metadata": {},
   "outputs": [],
   "source": [
    "def transcribe(model, file):\n",
    "    s_transcribe = time.time()\n",
    "    result = model.transcribe(file, verbose=True)\n",
    "    e_transcribe = time.time()\n",
    "    writeLog(f'- **Transcribe ({file}) : {e_transcribe-s_transcribe}**\\n')\n",
    "    writeLog(f' - ({file}) : {result[\"text\"]}\\n')\n",
    "\n",
    "\n",
    "def writeLog(mes):\n",
    "    with open(LogFile, mode=\"a\") as f:\n",
    "        f.write(mes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46042b2a-507b-48c8-b0f2-dd9e8260c52a",
   "metadata": {},
   "outputs": [],
   "source": [
    "writeLog(\"# Whisper Research\\n\")\n",
    "writeLog(f\"## Research Start {datetime.now().strftime('%Y/%m/%d %H:%M:%S')}  \\n\")\n",
    "writeLog(f' ** cuda available: {torch.cuda.is_available()} **\\n')\n",
    "for m in Models:\n",
    "    writeLog(f' ### Model : ({m}) ---\\n')\n",
    "    s_loadmodel = time.time()\n",
    "    model = whisper.load_model(m)\n",
    "    e_loadmodel = time.time()\n",
    "    writeLog(f' ** Load Model ({m}) : {e_loadmodel-s_loadmodel}**\\n')\n",
    "    for file in Files:\n",
    "        transcribe(model, file)\n",
    "    writeLog(' -------- \\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d6bb338-cbe4-4bc1-9ce4-204771f9721b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# clear resources\n",
    "from numba import cuda\n",
    "device = cuda.get_current_device()\n",
    "device.reset()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}