# -*- coding: utf-8 -*-
import scrapy
from bs4 import BeautifulSoup
import re
import datetime

class MycoursespiderSpider(scrapy.Spider):
    name = 'mycoursespider'
    global mydict
    mydict = {}
    global list
    list = []
    start_urls = ['http://zt.gxtv.cn/zt/default.html']

    def parse(self, response):
        curr_time = datetime.datetime.now()
        pattern = str(curr_time.month) + '月' + str(curr_time.day) + '日'
        domain = 'http://zt.gxtv.cn'
        # ctable = response.css('a#ctable::attr(href)').extract()[0]
        # yield SplashRequest(ctable, self.parsecoursetable, endpoint='render.html', args={'images': 1})
        g5 = response.css('ul#g5 a[target=_blank]').extract()   #获取五年级栏目的内容
        g4 = response.css('ul#g4 a[target=_blank]').extract()   #获取四年级栏目的内容
        g5 = ''.join(g5)
        g4 = ''.join(g4)
        soup = BeautifulSoup(g5, 'html.parser')
        ensoup = BeautifulSoup(g4, 'html.parser')
        for i in ensoup.find_all('a'):
            if re.search(pattern + '-英语', i['title']) is not None:      #查看今天有没有英语课,乡下五年级学四年级
                mydict.update({i['title']: domain + i['href']})
        for i in soup.find_all('a'):                                      #查看今天五年级的更新内容
            if re.search(pattern, i['title']) is not None:
                mydict.update({i['title']: domain + i['href']})
        for key in mydict:
            page = mydict[key]
            yield scrapy.Request(page, callback=self.parseinside)

    def parseinside(self, response):
        curr_time = datetime.datetime.now()                                 #当前时间
        filename = str(curr_time.month) + '-' + str(curr_time.day) + '.txt'
        playhost = 'http://video.cdn.liangtv.cn.*mp4'                       #匹配链接字符串
        resp = response.text
        title = response.css('h3#title::text').extract_first()
        playlink = re.search(playhost, resp)
        if playlink is not None:
            video = str(playlink.group(0))
            mydict[title] = video
        with open(filename, 'w+') as f:
            for key in mydict:
                f.write(str(key) + ':' + str(mydict[key]))
        yield scrapy.Request(video, self.parsevideo, meta={'title': title}) #meta实现内部函数之间传参

    def parsevideo(self, response):                                         #保存视频
        title = response.meta['title'] + '.mp4'
        with open(title, 'wb') as f:

    # def parsecoursetable(self, response):         #尝试抓课程表下来,转了半天splash死活抓不出他动态渲染后的页面
    #     resp = response.text
    #     resp = '' + resp
    #     resp.encode('utf-8')
    #     print(resp)
    #     with open('download.html', 'w+') as f:
    #         f.write(response.text)


