|
- #4.3
- def parse_page(url):
- #消息头
- headers = {
- 'User-Agent': 'Mozilla/5.0(Windows NT 10.0;Win64;x64) '
- 'AppleWebkit/537.36 (KHTML, like Geoko) Chrome/70.0.3538.102 safari/537.36'
- }
- response = requests.get(url,headers = headers)#使用get请求url
- text = response.content.decode('utf-8')#设置字符集为utf-8
- soup = BeautifulSoup(text, 'html5lib') #由于html5lib容错性好,因此用它不用lxml
- conMidtab = soup.find('div',class_='conMidtab')#查找class为conMidtab的div
- tables = conMidtab.find_all('table')#查找所有table
- #查看是否拿到每个城市的天气
- for table in tables: #遍历table
- trs = table.find_all('tr')[2:]
- for index,tr in enumerate(trs):
- tds = tr.find_all('td')
- city_td = tds[0]
- if index == 0:
- city_td = tds[1]
- city = list(city_td.stripped_strings)[0] #获取标签里面的字符串属性返回一个生成器,转化为列表
- temp_td = tds[-2]
- min_temp = list(temp_td.stripped_strings)[0]
- ALL_DATA.append({'城市':city,'最低气温':int(min_temp)}) # 将数据添加到列表
-
-
- #4.4
-
- def main():
- #预设需要请求的url
- urls = [
- 'http://www.weather.com.cn/textFC/hb.shtml',
- 'http://www.weather.com.cn/textFC/db.shtml',
- 'http://www.weather.com.cn/textFC/hz.shtml',
- 'http://www.weather.com.cn/textFC/hn.shtml',
- 'http://www.weather.com.cn/textFC/hd.shtml',
- 'http://www.weather.com.cn/textFC/xb.shtml',
- 'http://www.weather.com.cn/textFC/xn.shtml',
- 'http://www.weather.com.cn/textFC/gat.shtml'
- ]
- for url in urls:
- parse_page(url)
- #分析数据,根据最低气温进行排序
- ALL_DATA.sort(key=lambda data: data['最低气温'])
- data = ALL_DATA[0:10] #取出前10的最低气温及其城市
- return data
-
- #4.5
- if __name__ == '__main__':
- datas = main()
- city = []
- temp = []
- for data in datas:
- city.append(data['城市'])
- temp.append(data['最低气温'])
- plt.bar(range(len(city)), temp, tick_label=city)#绘制柱状图
- plt.show()#显示图表
-
- #4.6
- def main():
- '''
-
- 主函数
- '''
-
- aqi_data = pd.read_csv('/home/ubuntu/china_city_AQI.csv')#加载全国城市数据
- print('基本信息:')
- print(aqi_data.info())
-
- print('数据预览: ')
- print(aqi_data.head())
-
- #基本统计
- print("AQI最大值:",aqi_data['AQI'].max()) #AQI的最大值
- print("AQI最小值:", aqi_data['AQI'].min()) #AQI的最小值
- print("AQI均值:", aqi_data['AQI'].mean()) #AQI的均值
-
- #top10的城市
- top10_cities = aqi_data.sort_values(by=['AQI']).head(10)
- print('空气质量最好的10个城市:')
- print(top10_cities)
-
- #bottom10的城市
- bottom10_cities = aqi_data.sort_values(by=['AQI'],ascending=False).head(10)
- print('空气质量最差的10个城市:')
- print(bottom10_cities)
-
- #保存csv文件
- top10_cities.to_csv('/home/ubuntu/top10_aqi.csv', index=False)
- bottom10_cities.to_csv('/home/ubuntu/bottom10_aqi.csv', index=False)
-
- #6.1
- #中文乱码解决
- zhfont = mpl.font_manager.FontProperties(fname='/usr/share/fonts/truetype/wqy/wqy-microhei.ttc')
-
- #6.3
- def main():
- '''
- 主函数
- '''
- aqi_data = pd.read_csv('/home/ubuntu/china_city_AQI.csv') #加载全国城市数据
- print('基本信息:')
- print(aqi_data.info())
-
- print('数据预览: ')
- print(aqi_data.head())
-
- #数据清洗
- #只保留AQI>0的数据
- clean_aqi_data = aqi_data[aqi_data['AQI']>0]
-
-
- # 基本统计
- print("AQI最大值:", aqi_data['AQI'].max())
- print("AQI最小值:", aqi_data['AQI'].min())
- print("AQI均值:", aqi_data['AQI'].mean())
-
- # top50的城市
- top50_cities = aqi_data.sort_values(by=['AQI']).head(50)
- print(top50_cities)
- top50_cities.plot(kind='bar',x='City',y='AQI',title='空气质量最好的50个城市',
- figsize=(20,10))
- plt.savefig('/home/ubuntu/top50_cities_bar.png')#保存图片
- plt.show()
-
- #7.2
- def main():
-
- """
- 主函数
-
- """
-
- aqi_data = pd.read_csv('/home/ubuntu/china_city_AQI.csv')#读取全国城市数据
- print('基本信息:')
- print(aqi_data.info())
-
- print('数据预览: ')
- print(aqi_data.head())
-
- #数据清洗
- #只保留AQI>0的数据
- clean_aqi_data = aqi_data[aqi_data['AQI']>0]
-
-
- # 基本统计
- print("AQI最大值:", aqi_data['AQI'].max())
- print("AQI最小值:", aqi_data['AQI'].min())
- print("AQI均值:", aqi_data['AQI'].mean())
-
- # top50的城市
- top50_cities = aqi_data.sort_values(by=['AQI']).head(50)
- print(top50_cities)
-
- bar = Bar('空气质量指数最优50城市')
- bar.add(
- '',
- top50_cities['City'],
- top50_cities['AQI'],
- is_label_show=False,
- #label_color = ['#a6c84c','#ffa022','#46bee9'],
- xaxis_interval = 0,#x轴之间的间隔
- xaxis_rotate = 30,#x轴刻度倾斜读书
- is_datazoom_show = True,#展示缩放区域
- #is_convert = True
- )
-
- bar.render('./空气质量指数最优50城市.html')
|