Bonjour à tous! Dans cet article, je partagerai mon implémentation d'un robot de télégramme capable de traduire des articles d'Internet en fichiers mp3. Pour cela, j'utiliserai python 3.6 et les bibliothèques associées. Alors, commençons.
pip install pyttsx3 pip install langdetect pip install pydub pip install bs4 pip install telebot pip install PyTelegramBotAPI
:
bot.py
parser.py
voice.py
. bot.py , . parser.py , , voice.py . , , (url- , <article>, ), bs4 pyttsx3 , mp3 . . ...
bot.py
message_handler, /start
import telebot
from voice import get_mp3_file, get_file_name
from parser import get_article_text, get_article_language, get_link
bot = telebot.TeleBot('TOKEN')
@bot.message_handler(commands=['start'])
def forward_message(message):
bot.send_message(message.from_user.id, ", ,"
" , "
" mp3 .")
is_running = False
@bot.message_handler(content_types=['text'])
def forward_message(message):
global is_running #
if not is_running:
link = get_link(message.text)
if link: # , , -
is_running = True
article_text = get_article_text(link)
article_language = get_article_language(article_text)
if article_language: # ,
bot.send_message(message.from_user.id, ", .")
bot.send_message(message.from_user.id, f" - {article_language[0]}.")
bot.send_message(message.from_user.id, " ...")
file_name = get_file_name(link)
get_mp3_file(file_name, article_text, article_language[1])
bot.send_audio(message.from_user.id, audio=open(file_name, 'rb'))
else:
bot.send_message(message.from_user.id, " ,"
" ...")
is_running = False
else:
bot.send_message(message.from_user.id, ", , "
" .")
else:
bot.send_message(message.from_user.id, " , "
" ...")
is_running , , . , , , - . link , url-, article_language, .
if link: # , , -
is_running = True
article_text = get_article_text(link)
article_language = get_article_language(article_text)
parser.py
import requests
from bs4 import BeautifulSoup
from langdetect import detect
import re
def get_link(message_text):
#
link_arr = re.findall(r'^https?:\/\/?[\w-]{1,32}'
r'\.[\w-]{1,32}[^\s@]*$', message_text)
if len(link_arr) > 0:
link = link_arr[0]
return link
return False
, , get_link. url , , False, : ", , ."
def get_article_text(link):
try:
# ,
response = requests.get(link)
except requests.exceptions.ConnectionError:
return False
#
parser = BeautifulSoup(response.content, 'html.parser')
try:
# <article>
article_text = parser.select_one('article').get_text(separator='. ')
except AttributeError:
return False
return article_text
, , url. requests.exceptions.ConnectionError False, get_article_language.
BeautifulSoup4 <article>, , . , , , . get_article_text , False.
def get_article_language(article_text):
try:
language = detect(article_text) #
except TypeError:
return False
if language == 'en':
return ['EN', ['en_GB']]
if language == 'ru':
return ['RU', ['ru_RU']]
return False
, langdetect. 2 : , . , 0- , 1- pyttsx3 , False, : " , …".
voice.py
import pyttsx3
from pydub import AudioSegment
import re
def engine_settings(engine, article_language):
voices = engine.getProperty('voices')
engine.setProperty('rate', 185) #
for voice in voices:
if voice.languages == article_language and \
voice.gender == 'VoiceGenderMale':
return engine.setProperty('voice', voice.id) #
def get_mp3_file(file_name, article_text, article_language):
engine = pyttsx3.init()
engine_settings(engine, article_language) #
engine.save_to_file(article_text, file_name) #
engine.runAndWait()
convert_file_to_mp3(file_name) # mp3
def convert_file_to_mp3(file_name):
converter = AudioSegment
converter_file = converter.from_file(file_name)
converter_file.export(file_name, format="mp3")
def get_file_name(link):
# -
file_name = re.split(r'^https?:\/\/?', link)[1]
for symbols_in_file_name in ['/', '.', '-']:
# '_', OS
file_name = file_name.replace(symbols_in_file_name, '_')
file_name = file_name+'.mp3' # mp3
return file_name
pyttsx3 , mp3 pydub. engine_settings, , - - .
get_mp3_file engine_settings, ( get_file_name mp3, pyttsx3 , , AudioSegment). engine.runAndWait mp3, .
. .
python bot.py
, , <article> pyttsx3 , . .