HOME/Articles/

mysql example getListEps (snippet)

Article Outline

Python mysql example 'getListEps'

Functions in program:

  • def getListEps(url):

Modules used in program:

  • import mysql.connector
  • import base64
  • import re
  • import requests

python getListEps

Python mysql example: getListEps

from datetime import date
from bs4 import BeautifulSoup
import requests
import re
import base64
import mysql.connector

id_anime = "4"

def getListEps(url):
    # Request halaman utama anime berisi list eps
    content = requests.get(url)
    soup = BeautifulSoup(content.text, 'html.parser')
    # Mengambil list episodes
    result = soup.find('div', 'episodelist').find("ul").findChildren("a")
    link = []
    eps = []
    list_link = []
    for child in result:
        # Memasukkan link dan eps ke list
        eps.append(child['href'].split('-')[-1].replace("/", ""))
        link.append(child['href'])
    # Menghapus duplikat link dan eps
    link = list(dict.fromkeys(link))
    eps = list(dict.fromkeys(eps))
    # Menggabungkan link dan eps ke satu list
    for i in range(len(eps)):
        list_link.append([eps[i], link[i]])
    a=0
    # Mengambil link google drive dari masing2 eps
    while a < len(list_link):
        connect = mysql.connector.connect(host="localhost", user="amusfq", passwd="1412", database="anime_seasons")
        cursor = connect.cursor()
        eps = list_link[a][0]
        link = list_link[a][1]
        query = "SELECT * FROM list_eps WHERE eps='{}'".format(eps)
        cursor.execute(query)
        result = cursor.fetchall()
        # Mengecek data sudah ada di db atau tidak
        # Kalau ada skip
        if len(result) < 1:
            # Mengakses link dari episode yang akan diambil base link nya
            content = requests.get(url)
            soup = BeautifulSoup(content.text, 'html.parser')
            try:
                # Kalau sukses ambil link dari samehada (base link) masukkin ke variable
                # Kalau tidak null
                result = soup.find('div', 'download-eps').find_all("a", text="GD2")
                try:
                    # Mencoba membypass safelink system
                    # Kalau gagal link google drive == null
                    gd = result[-1]['href'].replace("https://anjay.info?id=", "")
                    url = "https://www.anjay.info/spatial-coexistence/"
                    # Mengakses safe link pertama
                    content = requests.get(gd)
                    soup = BeautifulSoup(content.text, 'html.parser')
                    post = {'eastsafelink_id': b64}
                    # mengakses safelink kedua
                    x = requests.post(url, data = post)
                    soup = BeautifulSoup(x.text, 'html.parser')
                    #print(the response text (the content of the requested file):)
                    regex = r'(?:https\:\/\/www\.ahexa\.com\/njir\/.*.\')'
                    z = re.findall(regex, soup.find(string=re.compile("ahexa")))
                    z = z[0][:-1]
                    # Mengakses safelink ketiga
                    x = requests.get(z)
                    soup = BeautifulSoup(x.text, 'html.parser')
                    regex = r"(?:aHR0cHM.*.\==\")"
                    z = re.search(regex, soup.prettify())
                    # Mencari link dengan 2 regex berbeda
                    if z is not None:
                        z = z.group()[:-1]
                        z = base64.b64decode(z).decode("utf-8")
                        print(z)
                        regex = r"(?:d\/.*\/)"
                        z = re.search(regex, z).group()[2:-1]
                    else:
                        regex = r"(?:open\?id\=.*\")"
                        z = re.search(regex, soup.prettify()).group()[8:-1]
                    gd = z
                except:
                    # gk dapet link T.T
                    gd = ""
            except:
                # gk dapet link T.T
                gd = ""
            result = soup.find_all("img")
            img = base64.b64encode(result[1]['src'].encode("utf-8")).decode("utf-8")
            # Masukkin data yg udh didapat id_anime, eps, eps url, google drive link, img per looping
            query = "INSERT INTO list_eps (id_anime, eps, url, gd, img, isDownloaded) VALUES ('{}', '{}', '{}', '{}', '{}', '0')".format(id_anime, eps, link, gd, img)
            try:
                cursor.execute(query)
            except:
                print(query)
            if cursor.rowcount > 0:
                connect.commit()
                print("Eps {} berhasil ditambah".format(eps))
            else:
                connect.rollback()
                print("Eps {} gagal ditambah".format(eps))
        # menambah increment looping
        a += 1

getListEps('https://www.samehadaku.tv/anime/black-clover/')