i'm using scripts to get the newest information on a website, all the pages are HTML. my scripts check every minutes to see if there any changes on the sites. however, every time i get new changes, it shows that the HTML page's lastmodified is about 9 minutes ago. i've set the right parameters to avoid cache. and the response status code is 200. why did i always get the changes 9 minutes ago? isn't the lastmodified the time when the page get updated? my expection is: i should get the web change notification in 60 seconds, not after 9 minutes.
#!/usr/bin/env python
#-*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import io
import sys
import datetime
sys.stdout=io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
from lxml import html
import xml
import json
import requests
import tkinter as tk
from tkinter import messagebox
import time
import winsound
import random
lastetag=""
def detectchange():
url = "http://59.252.41.1/?nocache=true&max-age=0"
headers = {
'Cache-Control': 'no-store',
'Pragma': 'no-cache',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'accept-encoding': 'gzip, deflate',
'accept-language': 'zh-CN,zh;q=0.9',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
}
s = requests.session()
s.keep_alive = False
s.headers = headers
req = s.get(url,verify=False)
global lastetag
if(req.headers['etag'] != lastetag):
now_time = datetime.datetime.now()
timestring = datetime.datetime.strftime(now_time,'%H:%M:%S')
messagebox.showinfo(timestring,message)
lastetag=req.headers['etag']
else:
time.sleep(60)
if __name__=='__main__':
while(True):
detectchange()
Aucun commentaire:
Enregistrer un commentaire