首页 > 其他分享> > 寒假自学(十一)

寒假自学(十一)

2021-01-15 23:35:44 作者：互联网

希望所有温柔又可爱的人最后都能幸福❤

今日总结：

代码量	400行
博客量	一篇
所学时间	6小时左右
了解到的知识点	python爬取疫情信息、Acwing每日一题

明日计划：

早上	python数据分析入门
下午	python数据分析入门
晚上	Acwing每日一题

具体内容：
采集最近一日的疫情数据

import requests
from bs4 import BeautifulSoup
import re
import json

response = requests.get("http://ncov.dxy.cn/ncovh5/view/pneumonia")
home_page = response.content.decode()

soup = BeautifulSoup(home_page, 'lxml')
script = soup.find(id="getListByCountryTypeService2true")
text = script.string

json_str = re.findall(r'\[.+\]', text)[0]
# print(json_str)

last_day_corona_virus = json.loads(json_str)
print(last_day_corona_virus)
with open('last_day_corona_virus.json','w',encoding='utf-8') as fp:
    json.dump(last_day_corona_virus,fp,ensure_ascii=False)

改造:

import requests
from bs4 import BeautifulSoup
import re
import json


class CoronaVirusSpider(object):
    def __init__(self):
        self.home_url = "http://ncov.dxy.cn/ncovh5/view/pneumonia"

    def get_content_from_url(self, url):
        """
        根据url，获取响应内容的字符串数据
        :param url:请求的url
        """
        response = requests.get(url)
        return response.content.decode()

    def parse_home_page(self, home_page):
        """
        解析首页内容，获取解析后的python数据
        :param home_page:首页内容
        :return:解析后的python数据
        """
        soup = BeautifulSoup(home_page, 'lxml')
        script = soup.find(id="getListByCountryTypeService2true")
        text = script.string

        json_str = re.findall(r'\[.+\]', text)[0]
        # print(json_str)

        data = json.loads(json_str)

        return data

    def save(self, data, path):
        with open(path, 'w', encoding='utf-8') as fp:
            json.dump(data, fp, ensure_ascii=False)

    def crawl_last_day_corona_virus(self):
        """
        采集最近一天的各国疫情数据
        :return:
        """
        home_page = self.get_content_from_url(self.home_url)
        last_day_corona_virus = self.parse_home_page(home_page)
        self.save(last_day_corona_virus, 'last_day_corona_virus.json')

    def run(self):
        self.crawl_last_day_corona_virus()


if __name__ == '__main__':
    spider = CoronaVirusSpider()
    spider.run()

采集从1月23号以来各国疫情数据

import requests
from bs4 import BeautifulSoup
import re
import json
from tqdm import tqdm

class CoronaVirusSpider(object):
    def __init__(self):
        self.home_url = "http://ncov.dxy.cn/ncovh5/view/pneumonia"

    def get_content_from_url(self, url):
        """
        根据url，获取响应内容的字符串数据
        :param url:请求的url
        """
        response = requests.get(url)
        return response.content.decode()

    def parse_home_page(self, home_page):
        """
        解析首页内容，获取解析后的python数据
        :param home_page:首页内容
        :return:解析后的python数据
        """
        soup = BeautifulSoup(home_page, 'lxml')
        script = soup.find(id="getListByCountryTypeService2true")
        text = script.string

        json_str = re.findall(r'\[.+\]', text)[0]
        # print(json_str)

        data = json.loads(json_str)

        return data

    def save(self, data, path):
        with open(path, 'w', encoding='utf-8') as fp:
            json.dump(data, fp, ensure_ascii=False)

    def crawl_last_day_corona_virus(self):
        """
        采集最近一天的各国疫情数据
        :return:
        """
        home_page = self.get_content_from_url(self.home_url)
        last_day_corona_virus = self.parse_home_page(home_page)
        self.save(last_day_corona_virus, 'last_day_corona_virus.json')

    def crawl_corona_virus(self):
        """
        采集从1月23号以来各国疫情数据
        :return:
        """
        with open('last_day_corona_virus.json', encoding='utf-8') as fp:
            last_day_corona_virus = json.load(fp)
        # print(last_day_corona_virus)
        corona_virus = []
        for county in tqdm(last_day_corona_virus,'采集1月23日以来各国疫情信息'):
            statistic_data_url = county['statisticsData']
            statistic_data_json_str = self.get_content_from_url(statistic_data_url)

            statistic_data = json.loads(statistic_data_json_str)['data']
            # print(statistic_data)
            for one_day in statistic_data:
                one_day['provinceName'] = county['provinceName']
                one_day['countryShortCode'] = county['countryShortCode']
            # print(statistic_data)
            corona_virus.extend(statistic_data)
        self.save(corona_virus, 'corona_virus.json')

    def run(self):
        # self.crawl_last_day_corona_virus()
        self.crawl_corona_virus()


if __name__ == '__main__':
    spider = CoronaVirusSpider()
    spider.run()

疫情爬虫项目

import requests
from bs4 import BeautifulSoup
import re
import json
from tqdm import tqdm


class CoronaVirusSpider(object):
    def __init__(self):
        self.home_url = "http://ncov.dxy.cn/ncovh5/view/pneumonia"

    def get_content_from_url(self, url):
        """
        根据url，获取响应内容的字符串数据
        :param url:请求的url
        """
        response = requests.get(url)
        return response.content.decode()

    def parse_home_page(self, home_page, tag_id):
        """
        解析首页内容，获取解析后的python数据
        :param home_page:首页内容
        :return:解析后的python数据
        """
        soup = BeautifulSoup(home_page, 'lxml')
        script = soup.find(id=tag_id)
        text = script.string

        json_str = re.findall(r'\[.+\]', text)[0]
        # print(json_str)

        data = json.loads(json_str)

        return data

    def save(self, data, path):
        with open(path, 'w', encoding='utf-8') as fp:
            json.dump(data, fp, ensure_ascii=False)

    def crawl_last_day_corona_virus(self):
        """
        采集最近一天的各国疫情数据
        :return:
        """
        home_page = self.get_content_from_url(self.home_url)
        last_day_corona_virus = self.parse_home_page(home_page, tag_id="getListByCountryTypeService2true")
        self.save(last_day_corona_virus, 'data/last_day_corona_virus.json')

    def crawl_corona_virus(self):
        """
        采集从1月23号以来各国疫情数据
        :return:
        """
        with open('last_day_corona_virus.json', encoding='utf-8') as fp:
            last_day_corona_virus = json.load(fp)
        # print(last_day_corona_virus)
        corona_virus = []
        for county in tqdm(last_day_corona_virus, '采集1月23日以来各国疫情信息'):
            statistic_data_url = county['statisticsData']
            statistic_data_json_str = self.get_content_from_url(statistic_data_url)

            statistic_data = json.loads(statistic_data_json_str)['data']
            # print(statistic_data)
            for one_day in statistic_data:
                one_day['provinceName'] = county['provinceName']
                one_day['countryShortCode'] = county['countryShortCode']
            # print(statistic_data)
            corona_virus.extend(statistic_data)
        self.save(corona_virus, 'data/corona_virus.json')

    def crawl_last_day_corona_virus_of_china(self):
        """
        采集最近一日各省疫情数据
        :return:
        """
        # 1.发送请求
        home_page = self.get_content_from_url(self.home_url)
        # 2.解析疫情首页，获取最近一日
        data = self.parse_home_page(home_page,tag_id='getAreaStat')
        # 3.保存
        self.save(data, 'data/last_day_corona_virus_of_china.json')

    def run(self):
        self.crawl_last_day_corona_virus()
        # self.crawl_corona_virus()
        self.crawl_last_day_corona_virus_of_china()


if __name__ == '__main__':
    spider = CoronaVirusSpider()
    spider.run()

了解可视化

https://www.acwing.com/activity/content/problem/content/3416/1/

直接bfs()即可

#include <bits/stdc++.h>
using namespace std;
int a[25][25];
int visited[25][25];
int dx[4] = {1,-1,0,0};
int dy[4] = {0,0,1,-1};
int n,m,sx,sy;
struct node
{
    int x,y;
};
void bfs()
{
    memset(visited,0,sizeof visited);
    int res = 0;
    queue<node> q;
    node st = {sx,sy};
    q.push(st);
    visited[st.x][st.y] = 1;
    while(!q.empty())
    {
        node z = q.front();
        q.pop();
        for (int i = 0; i < 4; i ++)
        {
            node w;
            w.x = z.x + dx[i];
            w.y = z.y + dy[i];
            if (a[w.x][w.y] == 1 && w.x>=1 && w.x <= n && w.y >= 1 && w.y <= m && visited[w.x][w.y] == 0)
            {
                q.push(w);
                //cout<<"aa: "<<w.x<<" "<<w.y<<endl;
                visited[w.x][w.y] = 1;
                res ++;
            }
        }
    }
    cout<<res + 1<<endl;
    
}
int main()
{
    char c;
    while(cin>>m>>n)
    {
        if (n + m == 0) break;
        memset(a,0,sizeof a);
        for (int i = 1; i <= n; i ++)
        {
            for (int j = 1; j <= m; j ++)
            {
                cin>>c;
                if (c == '.') a[i][j] = 1;
                if (c== '@')
                {
                    sx = i;
                    sy = j;
                }
            }
        }
        bfs();
    }
    
}

标签：十一,self,json,corona,virus,寒假,自学,data,day
来源： https://www.cnblogs.com/125418a/p/14280180.html