昨日作业:自动登陆抽屉新热榜
1 from selenium import webdriver 2 import time 3 4 driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe') 5 6 # 把窗口转成全屏 7 driver.maximize_window() 8 9 try:10 driver.get('https://dig.chouti.com/')11 driver.implicitly_wait(10)12 time.sleep(5)13 14 # 1、点击登录15 login_btn = driver.find_element_by_id('login_btn')16 login_btn.click()17 time.sleep(2)18 19 # 2、输入用户名20 phone = driver.find_element_by_class_name('login-phone')21 phone.send_keys('15622792660')22 23 # 3、输入密码24 pwd = driver.find_element_by_class_name('pwd-password-input')25 pwd.send_keys('kermit46709394')26 27 # 4、确认登录28 login_submit = driver.find_element_by_class_name('btn-large')29 login_submit.click()30 31 time.sleep(20)32 33 # 捕获异常并打印34 except Exception as e:35 print(e)36 37 finally:38 driver.close()
今日内容:
注意: selenium驱动的浏览器是干净的,没有任何缓存。 1、selenium剩余用法 2、selenium万能登录破解 3、selenium爬取京东商品信息 4、破解极验滑动验证码 Xpath语法: 今日作业: 1、总结课堂知识点,写博客 2、爬取京东商品信息 3、滑动验证(提高题) 1。selenium选择器之Xpath:
1 from selenium import webdriver 2 3 driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe') 4 5 6 try: 7 # 隐式等待: 写在get请求前 8 driver.implicitly_wait(5) 9 10 driver.get('https://doc.scrapy.org/en/latest/_static/selectors-sample1.html')11 12 # 显式等待: 写在get请求后13 # wait.until(...)14 15 '''16 17 18 1920 Example website 21 22 2324 Name: My image 1 25 Name: My image 2 26 Name: My image 3 27 Name: My image 4 28 Name: My image 5 2930 31 32 '''33 # 根据xpath语法查找元素34 # / 从根节点开始找第一个35 html = driver.find_element_by_xpath('/html')36 # html = driver.find_element_by_xpath('/head') # 报错37 print(html.tag_name)38 39 # // 从根节点开始找任意一个节点40 div = driver.find_element_by_xpath('//div')41 print(div.tag_name)42 43 # @44 # 查找id为images的div节点45 div = driver.find_element_by_xpath('//div[@id="images"]')46 print(div.tag_name)47 print(div.text)48 49 # 找到第一个a节点50 a = driver.find_element_by_xpath('//a')51 print(a.tag_name)52 53 # 找到所有a节点54 a_s = driver.find_elements_by_xpath('//a')55 print(a_s)56 57 # 找到第一个a节点的href属性58 # get_attribute:获取节点中某个属性59 a = driver.find_element_by_xpath('//a').get_attribute('href')60 print(a)61 62 finally:63 driver.close()
2.selenium剩余操作:
1 '''''' 2 ''' 3 点击、清除操作 4 ''' 5 # from selenium import webdriver 6 # from selenium.webdriver.common.keys import Keys 7 # import time 8 # 9 # driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe') 10 # 11 # try: 12 # driver.implicitly_wait(10) 13 # # 1、往jd发送请求 14 # driver.get('https://www.jd.com/') 15 # # 找到输入框输入围城 16 # input_tag = driver.find_element_by_id('key') 17 # input_tag.send_keys('围城') 18 # # 键盘回车 19 # input_tag.send_keys(Keys.ENTER) 20 # time.sleep(2) 21 # # 找到输入框输入墨菲定律 22 # input_tag = driver.find_element_by_id('key') 23 # input_tag.clear() 24 # input_tag.send_keys('墨菲定律') 25 # # 找到搜索按钮点击搜索 26 # button = driver.find_element_by_class_name('button') 27 # button.click() 28 # time.sleep(10) 29 # 30 # finally: 31 # driver.close() 32 33 34 ''' 35 获取cookies (了解) 36 ''' 37 # from selenium import webdriver 38 # import time 39 # 40 # driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe') 41 # 42 # try: 43 # driver.implicitly_wait(10) 44 # driver.get('https://www.zhihu.com/explore') 45 # print(driver.get_cookies()) 46 # 47 # time.sleep(10) 48 # finally: 49 # driver.close() 50 51 ''' 52 选项卡 53 ''' 54 #选项卡管理:切换选项卡,有js的方式windows.open,有windows快捷键: 55 # ctrl+t等,最通用的就是js的方式 56 # import time 57 # from selenium import webdriver 58 # 59 # browser = webdriver.Chrome() 60 # try: 61 # browser.get('https://www.baidu.com') 62 # 63 # # execute_script: 执行javascrpit代码 64 # # 弹窗操作 65 # # browser.execute_script('alert("tank")') 66 # # 新建浏览器窗口 67 # browser.execute_script( 68 # ''' 69 # window.open(); 70 # ''' 71 # ) 72 # time.sleep(1) 73 # print(browser.window_handles) # 获取所有的选项卡 74 # # 切换到第二个窗口 75 # # 新: 76 # browser.switch_to.window(browser.window_handles[1]) 77 # # 旧: 78 # # browser.switch_to_window(browser.window_handles[1]) 79 # 80 # # 第二个窗口往淘宝发送请求 81 # browser.get('https://www.taobao.com') 82 # time.sleep(5) 83 # 84 # # 切换到第一个窗口 85 # browser.switch_to_window(browser.window_handles[0]) 86 # browser.get('https://www.sina.com.cn') 87 # 88 # time.sleep(10) 89 # finally: 90 # browser.close() 91 92 93 ''' 94 ActionChangs动作链 95 ''' 96 # from selenium import webdriver 97 # from selenium.webdriver import ActionChains 98 # import time 99 #100 # driver = webdriver.Chrome()101 # driver.implicitly_wait(10)102 # driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')103 #104 # try:105 #106 # # driver.switch_to_frame('iframeResult')107 # # 切换到id为iframeResult的窗口内108 # driver.switch_to.frame('iframeResult')109 #110 # # 源位置111 # draggable = driver.find_element_by_id('draggable')112 #113 # # 目标位置114 # droppable = driver.find_element_by_id('droppable')115 #116 # # 调用ActionChains,必须把驱动对象传进去117 # # 得到一个动作链对象,复制给一个变量118 # actions = ActionChains(driver)119 #120 # # 方式一: 机器人121 # # 瞬间把源图片位置秒移到目标图片位置122 # # actions.drag_and_drop(draggable, droppable) # 编写一个行为123 # # actions.perform() # 执行编写好的行为124 #125 #126 # # 方式二: 模拟人的行为127 # source = draggable.location['x']128 # target = droppable.location['x']129 # print(source, target)130 #131 # distance = target - source132 # print(distance)133 #134 # # perform:每个动作都要调用perform执行135 #136 # # 点击并摁住源图片137 # ActionChains(driver).click_and_hold(draggable).perform()138 #139 # s = 0140 # while s < distance:141 # # 执行位移操作142 # ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform()143 # s += 2144 #145 # # 释放动作链146 # ActionChains(driver).release().perform()147 #148 # time.sleep(10)149 #150 #151 # finally:152 # driver.close()153 154 155 '''156 前进、后退157 '''158 # from selenium import webdriver159 # import time160 #161 # driver = webdriver.Chrome()162 #163 # try:164 # driver.implicitly_wait(10)165 # driver.get('https://www.jd.com/')166 # driver.get('https://www.baidu.com/')167 # driver.get('https://www.cnblogs.com/')168 #169 # time.sleep(2)170 #171 # # 回退操作172 # driver.back()173 # time.sleep(1)174 # # 前进操作175 # driver.forward()176 # time.sleep(1)177 # driver.back()178 # time.sleep(10)179 #180 # finally:181 # driver.close()
3.破解登陆:
1 from selenium import webdriver 2 from selenium.webdriver import ChromeOptions 3 import time 4 r''' 5 步骤: 6 1、打开文件的查看,显示隐藏文件 7 2、找到C:\Users\administortra\AppData\Local\Google\Chrome\User Data 8 删除Default文件 9 3、重新打开浏览器,并登陆百度账号10 - 此时会创建一个新的Default缓存文件11 4、添加cookies12 5、关闭谷歌浏览器后执行程序13 '''14 # 获取options对象,参数对象15 options = ChromeOptions()16 17 # 获取cookies保存路径18 # 'C:\Users\administortra\AppData\Local\Google\Chrome\User Data'19 profile_directory = r'--user-data-dir=C:\Users\administortra\AppData\Local\Google\Chrome\User Data'20 21 # 添加用户信息目录22 options.add_argument(profile_directory)23 24 # 把参数加载到当前驱动中 chrome_options默认参数,用来接收options对象25 driver = webdriver.Chrome(chrome_options=options)26 27 try:28 driver.implicitly_wait(10)29 driver.get('https://www.baidu.com/')30 '''31 BDUSS:*****32 '''33 # 添加用户cookies信息34 # name、value必须小写35 driver.add_cookie({ "name": "BDUSS", "value": "用户session字符串"})36 37 # 刷新操作38 driver.refresh()39 40 time.sleep(10)41 42 finally:43 driver.close()
4.selenium爬取京东商品信息:
1 # '''''' 2 # ''' 3 # 爬取京东商品信息: 4 # 请求url: 5 # https://www.jd.com/ 6 # 提取商品信息: 7 # 1.商品详情页 8 # 2.商品名称 9 # 3.商品价格 10 # 4.评价人数 11 # 5.商品商家 12 # ''' 13 # from selenium import webdriver 14 # from selenium.webdriver.common.keys import Keys 15 # import time 16 # 17 # driver = webdriver.Chrome() 18 # 19 # try: 20 # driver.implicitly_wait(10) 21 # # 1、往京东主页发送请求 22 # driver.get('https://www.jd.com/') 23 # 24 # # 2、输入商品名称,并回车搜索 25 # input_tag = driver.find_element_by_id('key') 26 # input_tag.send_keys('macbook') 27 # input_tag.send_keys(Keys.ENTER) 28 # time.sleep(2) 29 # 30 # # 通过JS控制滚轮滑动获取所有商品信息 31 # js_code = ''' 32 # window.scrollTo(0,5000); 33 # ''' 34 # driver.execute_script(js_code) # 执行js代码 35 # 36 # # 等待数据加载 37 # time.sleep(2) 38 # 39 # # 3、查找所有商品div 40 # # good_div = driver.find_element_by_id('J_goodsList') 41 # good_list = driver.find_elements_by_class_name('gl-item') 42 # n = 1 43 # for good in good_list: 44 # # 根据属性选择器查找 45 # # 商品链接 46 # good_url = good.find_element_by_css_selector( 47 # '.p-img a').get_attribute('href') 48 # 49 # # 商品名称 50 # good_name = good.find_element_by_css_selector( 51 # '.p-name em').text.replace("\n", "--") 52 # 53 # # 商品价格 54 # good_price = good.find_element_by_class_name( 55 # 'p-price').text.replace("\n", ":") 56 # 57 # # 评价人数 58 # good_commit = good.find_element_by_class_name( 59 # 'p-commit').text.replace("\n", " ") 60 # 61 # # 商品商家 62 # good_from = good.find_element_by_class_name( 63 # 'J_im_icon').text.replace("\n", " ") 64 # 65 # good_content = f''' 66 # 商品链接: {good_url} 67 # 商品名称: {good_name} 68 # 商品价格: {good_price} 69 # 评价人数: {good_commit} 70 # 商品商家: {good_from} 71 # \n 72 # ''' 73 # print(good_content) 74 # with open('jd.txt', 'a', encoding='utf-8') as f: 75 # f.write(good_content) 76 # 77 # next_tag = driver.find_element_by_link_text('下一页') 78 # 79 # next_tag.click() 80 # 81 # time.sleep(10) 82 # 83 # 84 # finally: 85 # driver.close() 86 87 88 89 '''''' 90 ''' 91 爬取京东商品信息: 92 请求url: 93 https://www.jd.com/ 94 提取商品信息: 95 1.商品详情页 96 2.商品名称 97 3.商品价格 98 4.评价人数 99 5.商品商家100 '''101 from selenium import webdriver102 from selenium.webdriver.common.keys import Keys103 import time104 105 106 def get_good(driver):107 try:108 109 # 通过JS控制滚轮滑动获取所有商品信息110 js_code = '''111 window.scrollTo(0,5000);112 '''113 driver.execute_script(js_code) # 执行js代码114 115 # 等待数据加载116 time.sleep(2)117 118 # 3、查找所有商品div119 # good_div = driver.find_element_by_id('J_goodsList')120 good_list = driver.find_elements_by_class_name('gl-item')121 n = 1122 for good in good_list:123 # 根据属性选择器查找124 # 商品链接125 good_url = good.find_element_by_css_selector(126 '.p-img a').get_attribute('href')127 128 # 商品名称129 good_name = good.find_element_by_css_selector(130 '.p-name em').text.replace("\n", "--")131 132 # 商品价格133 good_price = good.find_element_by_class_name(134 'p-price').text.replace("\n", ":")135 136 # 评价人数137 good_commit = good.find_element_by_class_name(138 'p-commit').text.replace("\n", " ")139 140 good_content = f'''141 商品链接: {good_url}142 商品名称: {good_name}143 商品价格: {good_price}144 评价人数: {good_commit}145 \n146 '''147 print(good_content)148 with open('jd.txt', 'a', encoding='utf-8') as f:149 f.write(good_content)150 151 next_tag = driver.find_element_by_class_name('pn-next')152 next_tag.click()153 154 time.sleep(2)155 156 # 递归调用函数157 get_good(driver)158 159 time.sleep(10)160 161 finally:162 driver.close()163 164 165 if __name__ == '__main__':166 167 good_name = input('请输入爬取商品信息:').strip()168 169 driver = webdriver.Chrome()170 driver.implicitly_wait(10)171 # 1、往京东主页发送请求172 driver.get('https://www.jd.com/')173 174 # 2、输入商品名称,并回车搜索175 input_tag = driver.find_element_by_id('key')176 input_tag.send_keys(good_name)177 input_tag.send_keys(Keys.ENTER)178 time.sleep(2)179 180 get_good(driver)
5.破解极验滑动验证:
1 '''''' 2 ''' 3 破解极验滑动验证 4 博客园登录url: 5 https://account.cnblogs.com/signin?returnUrl=https%3A%2F%2Fwww.cnblogs.com%2F 6 1、输入用户名与密码,并点击登录 7 2、弹出滑动验证,获取有缺口与完整的图片 8 3、通过像素点进行比对,获取滑动位移距离 9 4、模拟人的行为轨迹 10 5、开始滑动 11 ''' 12 from selenium import webdriver # 用来驱动浏览器的 13 from selenium.webdriver import ActionChains # 破解滑动验证码的时候用的 可以拖动图片 14 import time 15 from PIL import Image # pip3 install pillow 16 import random 17 18 option = webdriver.ChromeOptions() 19 option.add_argument('disable-infobars') 20 21 driver = webdriver.Chrome(chrome_options=option) 22 23 24 def get_snap(driver): 25 # selenium自带的截图网页全屏图片 26 driver.save_screenshot('snap.png') 27 28 img = driver.find_element_by_class_name('geetest_canvas_img') 29 30 left = img.location['x'] 31 32 upper = img.location['y'] 33 34 right = left + img.size['width'] 35 lower = upper + img.size['height'] 36 37 # print(left, upper, right, lower) 38 img_obj = Image.open('snap.png') 39 40 # 对屏幕进行截取,获取滑动验证图片 41 image = img_obj.crop((left, upper, right, lower)) 42 43 return image 44 45 46 def get_image1(driver): 47 time.sleep(0.2) 48 js_code = ''' 49 var x = document.getElementsByClassName('geetest_canvas_fullbg')[0].style.display="block"; 50 console.log(x) 51 ''' 52 53 time.sleep(1) 54 driver.execute_script(js_code) 55 56 # 截取图片 57 img_obj = get_snap(driver) 58 59 return img_obj 60 61 62 def get_image2(driver): 63 time.sleep(0.2) 64 65 js_code = ''' 66 var x = document.getElementsByClassName('geetest_canvas_fullbg')[0].style.display="none"; 67 console.log(x) 68 ''' 69 70 driver.execute_script(js_code) 71 72 time.sleep(1) 73 74 # 截取图片 75 img_obj = get_snap(driver) 76 77 return img_obj 78 79 80 def get_distance(image1, image2): 81 # 初始值 82 start = 60 83 84 # 滑块色差 85 color_num = 60 86 87 for x in range(start, image1.size[0]): 88 for y in range(image1.size[1]): 89 90 rgb1 = image1.load()[x, y] 91 92 rgb2 = image2.load()[x, y] 93 94 r = abs(rgb1[0] - rgb2[0]) 95 g = abs(rgb1[1] - rgb2[1]) 96 b = abs(rgb1[2] - rgb2[2]) 97 98 if not (r < color_num and g < color_num and b < color_num): 99 return x - 7100 101 102 def get_stacks(distance):103 distance += 20104 105 '''106 匀加速\减速运行107 v = v0 + a * t108 109 位移:110 s = v * t + 0.5 * a * (t**2)111 '''112 113 # 初速度114 v0 = 0115 116 # 加减速度列表117 a_list = [3, 4, 5]118 119 # 时间120 t = 0.2121 122 # 初始位置123 s = 0124 125 # 向前滑动轨迹126 forward_stacks = []127 128 mid = distance * 3 / 5129 130 while s < distance:131 if s < mid:132 a = a_list[random.randint(0, 2)]133 134 else:135 a = -a_list[random.randint(0, 2)]136 137 v = v0138 139 stack = v * t + 0.5 * a * (t ** 2)140 141 # 每次拿到的位移142 stack = round(stack)143 144 s += stack145 146 v0 = v + a * t147 148 forward_stacks.append(stack)149 150 back_stacks = [-1, -1, -2, -3, -2, -3, -2, -2, -3, -1]151 152 return { 'forward_stacks': forward_stacks, 'back_stacks': back_stacks}153 154 155 def main():156 try:157 158 driver.get('https://passport.cnblogs.com/user/signin')159 driver.implicitly_wait(5)160 161 # 1.输入用户名与密码,点击登录162 username = driver.find_element_by_id('LoginName')163 password = driver.find_element_by_id('Password')164 login_button = driver.find_element_by_class_name('ladda-label')165 time.sleep(1)166 username.send_keys('_tank_')167 time.sleep(1)168 password.send_keys('k46709394.')169 170 # 这里需要等待账号密码输入完毕后再点击登录按钮,否则的不弹框171 time.sleep(1)172 login_button.click()173 # time.sleep(3)174 175 # 2.点击滑动验证按钮,获取图片176 geetest_button = driver.find_element_by_class_name('geetest_slider_button')177 geetest_button.click()178 179 time.sleep(0.2)180 181 # 3.针对完整的图片进行截取182 image1 = get_image1(driver)183 184 # 4.针对有缺口的图片进行截取185 image2 = get_image2(driver)186 187 # 5.对比两张图片,获取滑动距离188 distance = get_distance(image1, image2)189 190 # 6.模拟人为滑动轨迹191 stacks = get_stacks(distance)192 193 # 7.根据滑动轨迹进行滑动194 forward_stacks = stacks['forward_stacks']195 back_stacks = stacks['back_stacks']196 197 slider_button = driver.find_element_by_class_name('geetest_slider_button')198 time.sleep(0.2)199 200 ActionChains(driver).click_and_hold(slider_button).perform()201 202 time.sleep(0.2)203 for forward_stack in forward_stacks:204 ActionChains(driver).move_by_offset(xoffset=forward_stack, yoffset=0).perform()205 time.sleep(0.1)206 for back_stack in back_stacks:207 ActionChains(driver).move_by_offset(xoffset=back_stack, yoffset=0).perform()208 time.sleep(0.1)209 210 time.sleep(0.2)211 212 ActionChains(driver).move_by_offset(xoffset=5, yoffset=0).perform()213 ActionChains(driver).move_by_offset(xoffset=-5, yoffset=0).perform()214 215 ActionChains(driver).release().perform()216 217 time.sleep(50)218 219 220 finally:221 driver.close()222 223 224 if __name__ == '__main__':225 main()