{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%pip install python-vlc" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import requests\n", "import json\n", "import vlc\n", "import re\n", "import random\n", "from IPython.display import Audio\n", "\n", "import json\n", "import requests\n", "\n", "def load_json_database(source):\n", " records = []\n", " \n", " def parse_json_lines(lines):\n", " for line in lines:\n", " if line:\n", " try:\n", " record = json.loads(line)\n", " records.append(record)\n", " except json.JSONDecodeError as e:\n", " print(f\"Error parsing JSON: {e}\")\n", "\n", " try:\n", " if source.startswith('http://') or source.startswith('https://'):\n", " # Handle as URL\n", " response = requests.get(source)\n", " response.raise_for_status() # Raise an error for bad status codes\n", " parse_json_lines(response.iter_lines(decode_unicode=True))\n", " else:\n", " # Handle as file\n", " with open(source, 'r', encoding='utf-8') as file:\n", " parse_json_lines(file)\n", " except requests.exceptions.RequestException as e:\n", " print(f\"Error fetching data from URL: {e}\")\n", " except FileNotFoundError as e:\n", " print(f\"Error opening file: {e}\")\n", " except Exception as e:\n", " print(f\"An unexpected error occurred: {e}\")\n", " \n", " return records\n", "\n", "url = \"https://raw.githubusercontent.com/zelic91/camdict/main/cam_dict.refined.json\"\n", "json_database = load_json_database(url)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def search_in_json_database(database, search_word, region):\n", " for record in database:\n", " # 检查 word 字段是否匹配\n", " if record.get('word') == search_word:\n", " # 找到匹配项后,获取美式发音信息\n", " pos_items = record.get('pos_items', [])\n", " for pos_item in pos_items:\n", " pronunciations = pos_item.get('pronunciations', [])\n", " for pronunciation in pronunciations:\n", " if pronunciation.get('region') == region:\n", " # 找到美式发音,返回相关信息\n", " return {\n", " 'pronunciation': pronunciation.get('pronunciation'),\n", " 'audio': pronunciation.get('audio')\n", " }\n", " # 如果没有找到匹配的 word 字段,返回 'not exist'\n", " return 'not exist'\n", "\n", "def replace_with_underscores(match):\n", " return '_' * len(match.group(0))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "broadband\n", "https://dictionary.cambridge.org/media/english/us_pron/c/cus/cus00/cus00276.mp3\n", "Fill vowels in blanks: ˈbr__d.b_nd\n", "Fill in consonants in blanks: ˈ__ɑː_._æ__\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# get a random word from the database\n", "\n", "vowel_phonetics = re.compile(r'ɑː|ɑːr|ʌ||iː|ɪ|i|ɪr|ʊ|ʊr|uː|ʊr|e|er|æ|ə|ɚ|ɝː|ɒ|ɔː|ɔːr|ɔɪ|aɪ|aɪr|eɪ|aʊ|aʊr|oʊ|')\n", "consonant_phonetics = re.compile(r'p|b|t|d|k|ɡ|f|v|θ|ð|s|z|ʃ|ʒ|tʃ|dʒ|r|h|l|t̬|j|w|ŋ|n|m|tr|dr|ts|dz|br|pr|fr|ɡr|θr|dr|ʃr|kr|bl|kl|ɡl|fl|pl|sl|sp|st|sk|sm|sn|sw|str|spr|skr|spl|sfr|skw|skr|skl|')\n", "\n", "# if the word is with certain enddings such as 'es, ed, ing', get another word\n", "random_word = random.choice(json_database)\n", "while random_word['word'].endswith(('ed', 'ing', 'es', 'ts', 'ks', 'ds', 'ps', 'bs', 'gs', 'ls', 'rs', 'ms', 'ns', 'er', 'est')):\n", " random_word = random.choice(json_database)\n", "\n", "# get pronunciation of the random word with region 'us'\n", "random_word_us = search_in_json_database(json_database, random_word['word'], 'us')\n", "\n", "# get the word's phonetics\n", "random_word_entry = random_word['word']\n", "print(random_word_entry)\n", "\n", "random_word_phonetics = random_word_us['pronunciation']\n", "\n", "# get the audio url of the word\n", "random_word_us_audio_url = random_word_us['audio']\n", "print(random_word_us_audio_url)\n", "\n", "blank_vowel_phonetics = re.sub(vowel_phonetics, replace_with_underscores, random_word_phonetics)\n", "blank_consonant_phonetics = re.sub(consonant_phonetics, replace_with_underscores, random_word_phonetics)\n", "\n", "# fill vowels in blanks\n", "print(f'Fill vowels in blanks: {blank_vowel_phonetics}')\n", "\n", "# fill consonants in blanks\n", "print(f'Fill in consonants in blanks: {blank_consonant_phonetics}')\n", "\n", "# play the audio\n", "player = vlc.MediaPlayer(random_word_us['audio'])\n", "player.play()\n", "\n", "# display the audio\n", "Audio(url=random_word_us_audio_url)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 2 }