Nous écrivons un robot de télégramme qui traduira des articles Internet en fichiers mp3

Bonjour à tous! Dans cet article, je partagerai mon implémentation d'un robot de télégramme capable de traduire des articles d'Internet en fichiers mp3. Pour cela, j'utiliserai python 3.6 et les bibliothèques associées. Alors, commençons.





telegram. , . pip, . , , :





pip install pyttsx3
pip install langdetect
pip install pydub
pip install bs4
pip install telebot
pip install PyTelegramBotAPI
      
      



:





bot.py





parser.py





voice.py





. bot.py , . parser.py , , voice.py . , , (url- , <article>, ), bs4 pyttsx3 , mp3 . . ...





bot.py

message_handler, /start





import telebot
from voice import get_mp3_file, get_file_name
from parser import get_article_text, get_article_language, get_link


bot = telebot.TeleBot('TOKEN')

@bot.message_handler(commands=['start'])
def forward_message(message):
    bot.send_message(message.from_user.id, ",     ,"
                                           "    , "
                                           "    mp3 .")
      
      







is_running = False

@bot.message_handler(content_types=['text'])
def forward_message(message):
    global is_running  #         
    if not is_running:
        link = get_link(message.text)
        if link:  # ,    ,   - 
            is_running = True
            article_text = get_article_text(link)
            article_language = get_article_language(article_text)
            if article_language:  # ,                 
                bot.send_message(message.from_user.id, ",   .")
                bot.send_message(message.from_user.id, f"  - {article_language[0]}.")
                bot.send_message(message.from_user.id, " ...")
                file_name = get_file_name(link)
                get_mp3_file(file_name, article_text, article_language[1])
                bot.send_audio(message.from_user.id, audio=open(file_name, 'rb'))
            else:
                bot.send_message(message.from_user.id, "   ,"
                                                       "  ...")
            is_running = False
        else:
            bot.send_message(message.from_user.id, ",   ,  " 
                                                   "   .")
    else:
        bot.send_message(message.from_user.id, "   , " 
                                               " ...")
      
      



is_running , , . , , , - . link , url-, article_language, .





if link:  # ,    ,   - 
            is_running = True
            article_text = get_article_text(link)
            article_language = get_article_language(article_text)       
      
      



parser.py

import requests
from bs4 import BeautifulSoup
from langdetect import detect
import re


def get_link(message_text):
  	#      
    link_arr = re.findall(r'^https?:\/\/?[\w-]{1,32}'
                          r'\.[\w-]{1,32}[^\s@]*$', message_text)
    if len(link_arr) > 0:
        link = link_arr[0]
        return link
    return False
      
      



, , get_link. url , , False, : ", , ."





def get_article_text(link):
    try:
        #    ,    
        response = requests.get(link)
    except requests.exceptions.ConnectionError:
        return False
    #    
    parser = BeautifulSoup(response.content, 'html.parser')
    try:
      	#     <article>
        article_text = parser.select_one('article').get_text(separator='. ')
    except AttributeError:
        return False
    return article_text
      
      



, , url. requests.exceptions.ConnectionError False, get_article_language.





BeautifulSoup4 <article>, , . , , , . get_article_text , False.





def get_article_language(article_text):
    try:
        language = detect(article_text)  #   
    except TypeError:
        return False
    if language == 'en':
        return ['EN', ['en_GB']]
    if language == 'ru':
        return ['RU', ['ru_RU']]
    return False
      
      



, langdetect. 2 : , . , 0- , 1- pyttsx3 , False, : " , …".





voice.py

import pyttsx3
from pydub import AudioSegment
import re


def engine_settings(engine, article_language):
    voices = engine.getProperty('voices')
    engine.setProperty('rate', 185)  #    
    for voice in voices:
        if voice.languages == article_language and \
                voice.gender == 'VoiceGenderMale':
            return engine.setProperty('voice', voice.id)  #   


def get_mp3_file(file_name, article_text, article_language):
    engine = pyttsx3.init()
    engine_settings(engine, article_language)  #   
    engine.save_to_file(article_text, file_name)  #     
    engine.runAndWait()
    convert_file_to_mp3(file_name)  #   mp3 


def convert_file_to_mp3(file_name):
    converter = AudioSegment
    converter_file = converter.from_file(file_name)
    converter_file.export(file_name, format="mp3")


def get_file_name(link):
    #   -   
    file_name = re.split(r'^https?:\/\/?', link)[1]
    for symbols_in_file_name in ['/', '.', '-']:
      #       '_',     OS
        file_name = file_name.replace(symbols_in_file_name, '_')
    file_name = file_name+'.mp3'  #     mp3 
    return file_name
      
      



pyttsx3 , mp3 pydub. engine_settings, , - - .





get_mp3_file engine_settings, ( get_file_name mp3, pyttsx3 , , AudioSegment). engine.runAndWait mp3, .





. .





python bot.py
      
      



, , <article> pyttsx3 , . .








All Articles