数据库
首页 > 数据库> > PySpider 实现输入数学题目(latex)得到对应答案并存入MongoDB

PySpider 实现输入数学题目(latex)得到对应答案并存入MongoDB

作者:互联网

接上一篇

Python 实现输入积分题目(latex)得到对应答案并将结果存入json

对其进行改进

首先在 MongoBD 中创建如下结果的表
在这里插入图片描述

{
  "导数": [
    {
      "一阶导数": [
          {"题目": "答案"}
      ],
      
       "二阶导数": [
          {"题目": "答案"}
      ]
    }
  ],
  
  "积分": [
    {
      "定积分": [
          {"题目": "答案"}
      ],
      
       "不定积分": [
          {"题目": "答案"}
      ]
    }
  ]
}

使用普通的爬虫即可实现

import requests
import json
import matplotlib.pyplot as plt
import urllib

from lxml import etree
import pymongo
        
        
base_url = 'https://zs.symbolab.com/solver/derivative-calculator'

headers = {
'authority': 'zs.symbolab.com',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
       +'Chrome/62.0.3202.94 Safari/537.36',
       
 'cookie': ''
}

res = requests.get(base_url,headers = headers)
html = res.text
html = etree.HTML(html)
ii_list  = html.xpath('//ul[@class="m2u"]/li')
aa = {}
for i in ii_list:  
    ke_mu = i.xpath('./a/text()')[0].strip()
    url = 'https://zs.symbolab.com' + i.xpath('./a/@href')[0].strip()
    aa[ke_mu]=[]
    #aa['url']=url
    ti_xing = i.xpath('./ul/li')
    try:
        c= {}
        for j in ti_xing:
            ti_xing_ = j.xpath('./a/text()')[0].strip()
            c[ti_xing_]=[{"题目": "答案"}]
        aa[ke_mu].append(c) 
    except:
        pass
print(aa)


myclient = pymongo.MongoClient('mongodb://localhost:27017/')
mydb = myclient['Question_bank'] # 题库
col = mydb['Knowledge_points']  # 建知识点表
col.insert(aa)

在这里插入图片描述
在这里插入图片描述
接下来,爬取 题目 和 答案,并存入 MongoDB

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2021-02-03 08:46:07
# Project: daan

from pyspider.libs.base_handler import *
import json
import pymongo
import urllib

class Handler(BaseHandler):
    headers = {
    'authority': 'zs.symbolab.com',
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
           +'Chrome/62.0.3202.94 Safari/537.36',
           
     'cookie': ''}
   
    crawl_config = {
       'headers' : headers,
    }
    
    
    # 连接数据库
    def __init__(self):
        # //数据库配置,用的monggodb
        self.myclient = pymongo.MongoClient(host='localhost',port=27017)
        self.mydb = self.myclient['Question_bank']    # 题库
        
        
        self.ke_mu = "导数"
        self.ti_xing = "一阶导数"
        
        self.latex_math = '\int 5xdx'
        self.url_code_latex_math = urllib.parse.quote(self.latex_math)
        self.base_url = 'https://zs.symbolab.com/pub_api/steps?subscribed=false&language=zs&query='+ self.url_code_latex_math+'&plotRequest=PlotOptional&page=calculus-calculator'
                       
    @every(minutes=24 * 60)
    def on_start(self):
        self.crawl(self.base_url, callback=self.detail_page)



    @config(priority=2)
    def detail_page(self, response):
        #print(json.loads(response.text))
        solutions = json.loads(response.text)
        step_input = solutions['solutions'][0]['step_input']
        #print(step_input)
        entire_result = solutions['solutions'][0]['entire_result']
        
        return {
            "题目": step_input,
            "答案": entire_result
        }
    
    
    def on_result(self,result):
        if result:
            self.save_to_mongo(result)
       
    
    # insert到mongo
    def save_to_mongo(self,result):
        timu = result['题目']
        daan = result['答案']
        an = {result['题目']:result['答案']}
        print(an)
        #print(ti)
        #print(result['title'])
        #print(result['Typesetting'])
        # python中mongodb判断某字段的值是否存在
        count  = self.mydb['Knowledge_points'].count_documents({self.ke_mu+"."+self.ti_xing+"."+timu:daan})
        if count !=0:
            print("数据已存在")
        else:
            # 插入数据
            self.mydb['Knowledge_points'].update_one({self.ke_mu+"."+self.ti_xing+"."+"题目":"答案"},{ "$push": {self.ke_mu+".$."+self.ti_xing:an}})
            print('save to mongo',result)   

在存入数据库之前,需要判断数据是否已经存在

最终效果

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

标签:latex,url,MongoDB,self,result,ti,print,import,PySpider
来源: https://blog.csdn.net/qq_42374697/article/details/113618263