Implémentation du mode hors ligne pour Yandex.Music

introduction



Aujourd'hui, nous allons considérer un service de musique aussi connu que Yandex.Music. Bon service global, mais avec un inconvénient majeur - l'incapacité de travailler hors ligne. Nous essaierons de corriger ce malentendu ennuyeux en utilisant les outils disponibles.



Outils



Donc nous avons besoin:





Autorisation



Les utilisateurs non autorisés du service ne peuvent accéder qu'à des segments de chansons d'une durée maximale de 30 secondes. Ce n'est clairement pas suffisant pour une écoute de qualité. Nous nous connecterons de la manière la plus naturelle, via un formulaire Web et recevrons des cookies. Cela nous aidera ouvre pour faire des demandes et HTMLParser des formes analyse syntaxique.



def resolve_cookie(login: str, password: str) -> str:
    cookies = CookieJar()
    opener = urllib.request.build_opener(
        urllib.request.HTTPCookieProcessor(cookies),
        urllib.request.HTTPRedirectHandler())
    response = opener.open("https://passport.yandex.ru")
    doc = response.read()
    parser = FormParser()
    parser.feed(doc.decode("utf-8"))
    parser.close()
    parser.params["login"] = login
    response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
    doc = response.read()
    parser = FormParser()
    parser.feed(doc.decode("utf-8"))
    parser.close()
    parser.params["login"] = login
    parser.params["passwd"] = password
    response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
    cookie_data = {}
    for item in cookies:
        if item.domain == ".yandex.ru":
            cookie_data[item.name] = item.value
    if "yandex_login" not in cookie_data:
        keys = ", ".join(cookie_data.keys())
        raise Exception(f"Invalid cookie_data {keys}")
    return "; ".join(map(lambda v: f"{v[0]}={v[1]}", cookie_data.items()))


https://passport.yandex.ru login. , , . . — . yandex_login, . .



Yandex Music (HTML) API



. , aiohttp. html BeautifulSoup. , , -.



class YandexMusicApi:
    host = "music.yandex.ru"
    base_url = f"https://{host}"

    def __init__(self, cookie: str):
        self.headers = Headers(self.host, cookie)

    async def _request(self, end_point: str):
        async with aiohttp.ClientSession() as session:
            url = f"{self.base_url}/{end_point}"
            async with session.request(method="GET", url=url) as response:
                return await response.read()

    async def get_favorite_artists(self, login: str) -> List[Artist]:
        body = await self._request(f"users/{login}/artists")
        soup = BeautifulSoup(body, "lxml")
        artists_soup = soup.find("div", class_="page-users__artists")
        if artists_soup is None:
            caption = soup.find("div", class_="page-users__caption")
            if caption:
                raise Exception(caption.contents[0])
        result = []
        for artist_soup in artists_soup.find_all("div", class_="artist"):
            title_soup = artist_soup.find("div", class_="artist__name")
            title = title_soup.attrs["title"]
            title_href_soup = title_soup.find("a")
            id_ = int(title_href_soup.attrs["href"].split("/")[-1])
            result.append(Artist(id_, title))
        return result


, https://music.yandex.ru/users/<login>/artists page-users__artists . title artist__name. Id split .

, .





. , . — yandex-. , . Network , https://{host}/get-mp3/{sign}/{ts}/{path}, sign. (XGRlBW9FXlekgbPrRHuSiA) . , .



    async def get_track_url(self, album_id: int, track_id: int) -> str:
        async with aiohttp.ClientSession() as session:
            url = f"{self.base_url}/api/v2.1/handlers/track/{track_id}:{album_id}/" \
                  f"web-album-track-track-main/download/m?hq=0&external-domain={self.host}&overembed=no&__t={timestamp()}"
            page = f"album/{album_id}"
            headers = self.headers.build(page)
            async with session.request(method="GET", url=url, headers=headers) as response:
                body = await response.json()
                src = body["src"]
                src += f"&format=json&external-domain={self.host}&overembed=no&__t={timestamp()}"
                result = parse.urlparse(src)
                headers = self.headers.build(page, {
                    ":authority": "storage.mds.yandex.net",
                    ":method": "GET",
                    ":path": f"{result.path}/{result.query}",
                    ":scheme": "https",
                }, True)
                async with session.request(method="GET", url=src, headers=headers) as response:
                    body = await response.json()
                    host = body["host"]
                    path = body["path"]
                    s = body["s"]
                    ts = body["ts"]
                    sign = md5(f"XGRlBW9FXlekgbPrRHuSiA{path[1::]}{s}".encode("utf-8")).hexdigest()
                    url = f"https://{host}/get-mp3/{sign}/{ts}/{path}"
                    return url


, .





, aiohttp, — aiofile .



    async def download_file(cls, url: str, filename: str):
        async with aiohttp.ClientSession() as session:
            async with session.request(method="GET", url=url) as response:
                data = await response.read()
                async with AIOFile(filename, "wb") as afp:
                    await afp.write(data)
                    await afp.fsync()


. , , mp3, , , . , , . , . .





, . , , . , , . (- mp3 ).



    async def download_artist(self, artist: Artist, depth: Depth = Depth.NORMAL):
        artist_progress = tqdm(total=0, desc=artist.title, position=1, ascii=True)
        albums = await self.api.get_artist_albums(artist.id)
        artist_progress.total = len(albums)
        artist_progress.refresh()
        for album in albums:
            album_dir = os.path.join(self.target_dir, normalize(artist.title), f"{album.year} - {normalize(album.title)}")
            if depth < Depth.ALBUMS and os.path.exists(album_dir):
                artist_progress.update()
                continue
            album_progress = tqdm(total=0, desc=f"> {album.title}", position=0, ascii=True)
            tracks = await self.api.get_album_tracks(album.id)
            album_progress.total = len(tracks)
            album_progress.refresh()
            os.makedirs(album_dir, exist_ok=True)
            if album.cover:
                album_progress.total += 1
                cover_filename = os.path.join(album_dir, "cover.jpg")
                if not os.path.exists(cover_filename):
                    await self.download_file(album.cover, cover_filename)
                album_progress.update()
            for track in tracks:
                target_filename = os.path.join(album_dir, f"{track.num:02d}. {normalize(track.title)}.mp3")
                if depth >= Depth.TRACKS or not os.path.exists(target_filename):
                    url = await self.api.get_track_url(track.album_id, track.id)
                    await self.download_file(url, target_filename)
                    self.write_tags(target_filename, {
                        "title": track.title,
                        "tracknumber": str(track.num),
                        "artist": artist.title,
                        "album": album.title,
                        "date": str(album.year),
                    })
                album_progress.update()
            album_progress.close()
            artist_progress.update()
        artist_progress.close()


, , AC/DC . normalize:



def normalize(name: str) -> str:
    return name.replace("/", "-")


, ( ) . . asyncio.Semaphore asyncio.gather.

, .





, , , . .credentials, . .cookie .



def resolve_cookie() -> str:
    base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
    cookie_file = os.path.join(base_dir, ".cookie")
    if os.path.exists(cookie_file):
        with open(cookie_file, "rt") as file:
            return file.read()
    credentials_file = os.path.join(base_dir, ".credentials")
    if os.path.exists(credentials_file):
        config = configparser.ConfigParser()
        config.read(credentials_file)
        login = config["yandex"]["login"]
        password = config["yandex"]["password"]
    else:
        raise Exception(f"""Create \"{credentials_file}\" with content

[yandex]
login=<user_login>
password=<user_password>
""")
    cookie = auth.resolve_cookie(login, password)
    with open(cookie_file, "wt") as file:
        file.write(cookie)
    return cookie


, , . argparse, .

:



  • -a (--artist), Id , ,
  • -o (--output), , — Music .
  • -d (--depth), ,

    • - 0 (NORMAL) , , ,
    • Value 1 (ALBUMS)parcourt toutes les pistes de l'album et télécharge celles qui manquent
    • La valeur 2 (TRACKS)télécharge et écrase les pistes même si elles sont déjà présentes dans le système de fichiers


async def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-a", "--artist", help="Artist ID")
    parser.add_argument("-o", "--output", default=f"{Path.home()}/Music",
                        help=f"Output directory, default {Path.home()}/Music")
    parser.add_argument("-d", "--depth", default=0, type=int,
                        help=f"Exists files check depth, {enum_print(Depth)}")
    args = parser.parse_args()
    cookie = resolve_cookie()
    api = YandexMusicApi(cookie)
    agent = YandexMusicAgent(api, args.output)
    if args.artist:
        artist = await api.get_artist(args.artist)
        await agent.download_artist(artist, args.depth)
    else:
        email = re.compile(".*?yandex_login=(.*?);.*?", re.M).match(cookie).group(1)
        await agent.download_favorites(email, args.depth)


Et maintenant, enfin, nous pouvons tout exécuter:



if __name__ == "__main__":
    asyncio.run(main())


Merci de votre attention. Vous savez maintenant comment implémenter une API qui n'existe pas, téléchargez celle non téléchargeable et devenez l'heureux propriétaire de votre propre collection musicale.



Le résultat peut être vu dans le référentiel yandex.music.agent




All Articles