Avatar r50 a6ce93fe35b158fd29ba0e8681c918c22117160e9586a56eee4ffbc20df9bda1
программирование

Парсер

Добавлено 30 окт 2021 в 14:32
import requests
from bs4 import BeautifulSoup
import csv


def get_html(url):
r = requests.get(url)
return r.text


def refind(s):
# 1,806 total ratings
r = s.split(' ')[0]
return r.replace(',', '')


def write_csv(data):
with open('plugins.csv', 'a',) as f:
writer = csv.writer(f)
writer.writerow((data['name'],
data['url'],
data['reviews']))


def get_date(html):
soup = BeautifulSoup(html, 'lxml')
popular = soup.find_all('section')[3]
plugins = popular.find_all('article')

for plugin in plugins:
name = plugin.find('h3').text
url = plugin.find('h3').find('a').get('href')
r = plugin.find('span', class_='rating-count').find('a').text
rating = refind(r)

data = {'name': name,
'url': url,
'reviews': rating}

write_csv(data)


def main():
url = 'https://wordpress.org/plugins/'
get_date(get_html(url))


if __name__ == "__main__":
main()
1b638600a8