|
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
-
- from pydriller import RepositoryMining
- from pydriller import GitRepository
- from pydriller.metrics.process.change_set import ChangeSet
- from pydriller.metrics.process.code_churn import CodeChurn
- from pydriller.metrics.process.commits_count import CommitsCount
- from pydriller.metrics.process.contributors_count import ContributorsCount
- from pydriller.metrics.process.contributors_experience import ContributorsExperience
- from pydriller.metrics.process.hunks_count import HunksCount
- from pydriller.metrics.process.lines_count import LinesCount
- from datetime import datetime
- import psycopg2
- import time
- import calendar
- from collections import defaultdict
-
- # from matplotlib import pyplot as plt
- # import pandas as pd #用于生成满足绘图要求的数据格式
- # import statsmodels.api as sm #用于局部加权回归
-
-
- def plot(x, y):
- # ********* Begin *********#
- fig,ax=plt.subplots() #subplots返回画布和子图
- crime2=crime[~crime['state'].isin(['District of Columbia','United States'])]
- ax.set_xlim(0,10) #x轴范围从0到10
- ax.set_ylim(0,1200) #y轴范围从0到1200
- ax.set_xlabel("crime murder", fontsize=12) #设置x轴标签
- ax.set_ylabel("crime burglary", fontsize=12) #设置y轴标签
- #ax.set_title("美国谋杀率和入室盗窃率",fontproperties='SimHei',fontsize=16) #为图片添加标题
- lowess = sm.nonparametric.lowess(crime2["burglary"], crime2["murder"])
-
- ax.plot(lowess[:, 0], lowess[:, 1]) #绘制曲线图
- ax.plot(crime2["murder"],crime2["burglary"],"*",color="#00CC88") #绘制散点图
-
-
- plt.rcParams['figure.figsize'] = (8.0, 4.0)
- plt.show() #展示图像
- # ********* End *********#
- plt.savefig('matplotlibScatter/studentanswer/level_2/crime.png') #保存为png格式
- plt.close() #关闭画布窗口
-
-
-
-
- def others_fun():
-
- # /***Change Set***/
- metric = ChangeSet(path_to_repo,
- since=dt1,
- to=dt2)
- # the maximum and average number of files committed together
- maximum = metric.max()
- average = metric.avg()
- #print('Maximum number of files committed together: {}'.format(maximum))
- #print('Average number of files committed together: {}'.format(average))
-
- # /***Commits Count***/
- # the number of commits made to a file
- metric = CommitsCount(path_to_repo,
- since=dt1,
- to=dt2)
- files = metric.count()
- # the number of commits for each modified file in the evolution period
- #print('Files: {}, ***num: {}'.format(files, len(files)))
-
- # /***Contributors Count***/
- metric = ContributorsCount(path_to_repo,
- since=dt1,
- to=dt2)
- count = metric.count()
- minor = metric.count_minor()
- # the number of developers that contributed to each of the modified file in the evolution period
- # and the number of developers that contributed less than 5% to each of the modified file in the evolution period
- #print('Number of contributors per file: {}, ***num: {}'.format(count, len(count)))
- #print('Number of "minor" contributors per file: {}, ***num: {}'.format(minor, len(minor)))
-
- # /***Contributors Experience***/
- metric = ContributorsExperience(path_to_repo,
- since=dt1,
- to=dt2)
- files = metric.count()
- # the percentage of the lines authored by the highest contributor of a file
- # for each of the modified file in the evolution period
- #print('Files: {}, ***num: {}'.format(files, len(files)))
-
- #/*** Hunks Count***/
- # As a hunk is a continuous block of changes in a diff
- # this number assesses how fragmented the commit file is
- metric = HunksCount(path_to_repo,
- since=dt1,
- to=dt2)
- files = metric.count()
- # the median number of hunks for each of the modified file in the evolution period
- #print('Files: {}, ***num: {}'.format(files, len(files)))
-
-
- # /***Code Churn***/
- # A code churn is the sum of (added lines - removed lines)
- metric = CodeChurn(path_to_repo,
- since=dt1,
- to=dt2)
- # measures the code churns of a file.
- files_count = metric.count()
- files_max = metric.max()
- files_avg = metric.avg()
- #print('Total code churn for each file: {}'.format(files_count))
- #print('Maximum code churn for each file: {}'.format(files_max))
- #print('Average code churn for each file: {}'.format(files_avg))
-
- # the total number of added and removed lines for each modified file
- total_count = metric.count()
- line_num = 0
- file_num = 0
-
- '''
- #print('momodified file, added_count, added_max, added_avg, removed_count, removed_max, removed_avg')
- for key,value in total_count.items():
- #print('Total lines modified in {}: {}'.format(key, value))
-
- #if dict.has_key(key):
- if value == 0:
- continue
- else:
- line_num += value
- file_num += 1
- print(key, added_count[key], added_max[key], added_avg[key], removed_count[key], removed_max[key], removed_avg[key])
- '''
- file_num, line_num = add_values(total_count)
- '''
- print('Total lines removed per file: {}'.format(len(removed_count)))
- print('Maximum lines removed per file: {}'.format(len(removed_max)))
- print('Average lines removed per file: {}'.format(len(removed_avg)))
- print('Total lines added per file: {}'.format(len(added_count)))
- print('Maximum lines added per file: {}'.format(len(added_max)))
- print('Average lines added per file: {}'.format(len(added_avg)))
- print('The num of files: {}'.format(len(total_count)))
- print('The num of files modified: {}'.format(file_num))
- print('Total lines modified in all files: {}'.format(line_num))
- '''
-
- def add_values(mapdata):
- key_num = 0
- val_num = 0
- for key,value in mapdata.items():
- if value == 0:
- continue
- else:
- key_num += 1
- val_num += value
- return key_num, val_num
-
-
- # connect database
- conn = psycopg2.connect(database="gitea", user="gitea", password="gitea", host="127.0.0.1", port="5432")
- print("Opened database successfully")
-
- cur = conn.cursor()
-
- def create_tabel():
- #cur = conn.cursor()
- cur.execute('''CREATE TABLE COMPANY
- (ID INT PRIMARY KEY NOT NULL,
- NAME TEXT NOT NULL,
- AGE INT NOT NULL,
- ADDRESS CHAR(50),
- SALARY REAL);''')
-
-
- print("Table created successfully")
-
- conn.commit()
- conn.close()
-
- def insert():
- #cur = conn.cursor()
-
- cur.execute("INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) \
- VALUES (1, 'Paul', 32, 'California', 20000.00 )");
-
- cur.execute("INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) \
- VALUES (2, 'Allen', 25, 'Texas', 15000.00 )");
-
- cur.execute("INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) \
- VALUES (3, 'Teddy', 23, 'Norway', 20000.00 )");
-
- cur.execute("INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) \
- VALUES (4, 'Mark', 25, 'Rich-Mond ', 65000.00 )");
-
- conn.commit()
- print("Records created successfully")
- conn.close()
-
- def select():
- cur.execute("SELECT id, name, address, salary from COMPANY")
- rows = cur.fetchall()
- for row in rows:
- print("ID = ", row[0])
- print("NAME = ", row[1])
- print("ADDRESS = ", row[2])
- print("SALARY = ", row[3], "\n")
-
- print("Operation done successfully")
- conn.close()
-
-
- def update():
- cur.execute("UPDATE COMPANY set SALARY = 25000.00 where ID=1")
- conn.commit
- print("Total number of rows updated :", cur.rowcount)
-
- cur.execute("SELECT id, name, address, salary from COMPANY")
- rows = cur.fetchall()
- for row in rows:
- print("ID = ", row[0])
- print("NAME = ", row[1])
- print("ADDRESS = ", row[2])
- print("SALARY = ", row[3], "\n")
-
- print("Operation done successfully");
- conn.close()
-
- def delete():
- cur.execute("DELETE from COMPANY where ID=2;")
- conn.commit
- print("Total number of rows deleted :", cur.rowcount)
-
- cur.execute("SELECT id, name, address, salary from COMPANY")
- rows = cur.fetchall()
- for row in rows:
- print("ID = ", row[0])
- print("NAME = ", row[1])
- print("ADDRESS = ", row[2])
- print("SALARY = ", row[3], "\n")
-
- print("Operation done successfully");
- conn.close()
-
-
- #def delete_tabel():
-
-
- if __name__ == '__main__':
-
- ticks = time.time()
- # print('local time ticks is {}'.format(ticks))
- localtime = time.localtime(time.time())
- # print('local time is {}'.format(localtime))
-
-
- change_time = ticks - 60*60*24
- # print('after change time, ticks is {}'.format(change_time))
- change_local = time.localtime(change_time)
- # print('after change time local time is {}'.format(change_local) )
-
-
-
-
- time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- print('now time: {}'.format(time_str))
- time_unix = time.mktime(time.strptime(time_str,"%Y-%m-%d %H:%M:%S"))
- # print('time_unix: {}'.format(time_unix))
-
- change_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(change_time))
- print('before time: {}'.format(change_str))
-
-
- #dt1 = datetime(change_local.tm_year, change_local.tm_mon, change_local.tm_mday, change_local.tm_hour, change_local.tm_min, change_local.tm_sec)
- dt1 = datetime(change_local.tm_year, change_local.tm_mon, change_local.tm_mday)
- dt2 = datetime(localtime.tm_year, localtime.tm_mon, localtime.tm_mday, localtime.tm_hour, localtime.tm_min, localtime.tm_sec)
-
- #dt1 = datetime(2020, 7, 16)
- #dt2 = datetime(2020, 7, 24)
-
- from_commit='from commit hash'
- to_commit='to commit hash'
- from_tag = ''
- to_tag = ''
- # path_to_repo = '/home/pcl/guomy/code/profile/repos/opendata'
- # path_to_repo = "https://github.com/golang/vscode-go.git"
- # path_to_repo = 'https://github.com/PaddlePaddle/Paddle.git'
- path_to_repo = '/home/pcl/guomy/code/Paddle'
-
-
- # /*** analysis per commit***/
- rm = RepositoryMining(path_to_repo,
- since=dt1,
- to=dt2,
- # only_commits = ['dfb3ae1b9b44f351526d0f5b04e2656e127ed9be']
- # only_authors = ['chenwhql']
- # only_modifications_with_file_types=['.go', '.html', '.js', '.json', '.sample', '.py', '.ini', '.tmp1']
- # only_in_branch='master',
- only_no_merge=True
- ).traverse_commits()
- commit_author = {}
- commit_author_mod = {}
- commit_author_add = {}
- commit_author_del = {}
- commit_master = 0
- commit_other = 0
-
- commit_branchs = {}
-
- commit_file = {}
- commit_file_mod = {}
- commit_file_add = {}
- commit_file_del = {}
-
- commit_author_file = defaultdict(defaultdict)
- commit_author_file_mod = defaultdict(defaultdict)
- commit_author_file_add = defaultdict(defaultdict)
- commit_author_file_del = defaultdict(defaultdict)
-
- commit_file_author = defaultdict(defaultdict)
- commit_file_author_mod = defaultdict(defaultdict)
- commit_file_author_add = defaultdict(defaultdict)
- commit_file_author_del = defaultdict(defaultdict)
-
- authors = ['zhiqiu','Chen Weihang','Aurelius84']
-
- # Commit Object
- for commit in rm:
- name = commit.author.name
- if name in commit_author.keys():
- commit_author[name] += 1
- else:
- commit_author[name] = 1
- commit_author_add[name] = 0
- commit_author_del[name] = 0
- commit_author_mod[name] = 0
-
-
- for br in commit.branches:
- if br in commit_branchs.keys():
- commit_branchs[br] += 1
- else:
- commit_branchs[br] = 1
-
-
- if 'master' in commit.branches:
- #if 'develop' in commit.branches:
- commit_master += 1
- else:
- commit_other += 1
-
-
- #print("****************************begin****************************")
-
- print(
- #'project_name: {}'.format(commit.project_name),
- 'author_date: {}'.format(commit.author_date),
- '|Hash: {}'.format(commit.hash),
- '|author: {}'.format(commit.author.name),
- #'|comment: {}'.format(commit.msg),
- #'|{}'.format(commit.committer.name),
- #'author_timezone: {}'.format(commit.author_timezone),
- #'committer: {}'.format(commit.committer),
- #'committer_date: {}'.format(commit.committer_date),
- #'committer_timezone: {}'.format(commit.committer_timezone),
- 'branches: {}'.format(commit.branches),
- 'in_main_branch: {}'.format(commit.in_main_branch),
- 'merge: {}'.format(commit.merge),
- #'parents: {}'.format(commit.parents),
- #'project_path:{} '.format(commit.project_path)
- )
- print("-------------------------------------------------------------")
-
-
- # for each modifications per file
- for m in commit.modifications:
- '''
- if (m.filename).endswith(('.jpg', '.png')):
- continue
- '''
- commit_author_add[name] += m.added
- commit_author_del[name] += m.removed
- commit_author_mod[name] += (m.added + m.removed)
-
- if m.filename in commit_author_file[name].keys():
- commit_author_file[name][m.filename] += 1
- commit_author_file_mod[name][m.filename] += (m.added + m.removed)
- commit_author_file_add[name][m.filename] += m.added
- commit_author_file_del[name][m.filename] += m.removed
-
- else:
- commit_author_file[name][m.filename] = 1
- commit_author_file_mod[name][m.filename] = (m.added + m.removed)
- commit_author_file_add[name][m.filename] = m.added
- commit_author_file_del[name][m.filename] = m.removed
-
-
- if m.filename in commit_file.keys():
- commit_file[m.filename] += 1
- commit_file_add[m.filename] += m.added
- commit_file_del[m.filename] += m.removed
- commit_file_mod[m.filename] += (m.added + m.removed)
- else:
- commit_file[m.filename] = 1
- commit_file_add[m.filename] = m.added
- commit_file_del[m.filename] = m.removed
- commit_file_mod[m.filename] = (m.added + m.removed)
-
- '''
- print(
- "Author {}".format(commit.author.name),
- " modified {}".format(m.filename),
- #" changed_methods are {}".format(m.changed_methods),
- " with a change type of {}".format(m.change_type.name),
- " added {} removed {}".format(m.added, m.removed),
- #" and the complexity is {}".format(m.complexity)
- )
- '''
-
-
- commit_author_order = sorted(commit_author.items(),key=lambda x:x[1],reverse=True)
- commit_author_add_order = sorted(commit_author_add.items(),key=lambda x:x[1],reverse=True)
- commit_author_del_order = sorted(commit_author_del.items(),key=lambda x:x[1],reverse=True)
-
- commit_file_order = sorted(commit_file.items(),key=lambda x:x[1],reverse=True)
- commit_file_add_order = sorted(commit_file_add.items(),key=lambda x:x[1],reverse=True)
- commit_file_del_order = sorted(commit_file_del.items(),key=lambda x:x[1],reverse=True)
-
- top = 5
- for f,v in commit_file_order:
- if top <= 0:
- break
- print('file: {},commit: {},modify: {},add: {},del: {} '.format(f, v, commit_file_mod[f], commit_file_add[f], commit_file_del[f]))
- top -=1
-
- top = 15
- for f,v in commit_author_order:
- if top <= 0:
- break
- print('author: {},commit: {},modify: {},add: {},del: {} '.format(f, v, commit_author_mod[f], commit_author_add[f], commit_author_del[f]))
- top -=1
-
-
- # test
- # authors = ['zhiqiu','Chen Weihang','Aurelius84']
- for author in authors:
- if author in commit_author.keys():
- print('author: {},commit: {},modify: {},add: {},del: {} '.format(author, commit_author[author], commit_author_mod[author], commit_author_add[author], commit_author_del[author]))
- else:
- print('author {} not commit among the time'.format(author))
-
- print("*****************************end*****************************")
-
-
- # /*** common git***/
- gr = GitRepository(path_to_repo)
- # get total number of commits
- n_cm = gr.total_commits()
-
- # get the list of all commits
- #gr.get_list_commits()
-
- # get the specific commit
- #gr.get_commit('cc5b002')
-
- # get the commit with tag v1.15
- #gr.get_commit_from_tag('v1.15')
-
- # get the list of files present in the repo at the current commit
- file_map = {}
- for item in gr.files():
- file_map[item] = 1
-
- print('The num of files current in the repo is {}'.format(len(file_map)) )
- #print('The total number of commits is {}'.format(gr.total_commits()) )
- print('The total number of commits is {}'.format(n_cm) )
-
-
- #/*** Lines Count per file***/
- metric = LinesCount(path_to_repo,
- since=dt1,
- to=dt2)
-
- # the total, maximum and average number of lines added for each modified file in the evolution period
- added_count = metric.count_added()
- added_max = metric.max_added()
- added_avg = metric.avg_added()
- #print('Total lines added per file: {}'.format(added_count))
- #print('Maximum lines added per file: {}'.format(added_max))
- #print('Average lines added per file: {}'.format(added_avg))
- # the total, maximum and average number of lines removed for each modified file in the evolution period
- removed_count = metric.count_removed()
- removed_max = metric.max_removed()
- removed_avg = metric.avg_removed()
- #print('Total lines removed per file: {}'.format(removed_count))
- #print('Maximum lines removed per file: {}'.format(removed_max))
- #print('Average lines removed per file: {}'.format(removed_avg))
-
-
- mod_file = {}
- mod_add = 0
- mod_removed = 0
-
- for key,value in added_count.items():
- if value == 0:
-
- continue
- else:
- mod_add += value
- mod_file[key] = 1
-
- for key,value in removed_count.items():
- if value == 0:
- continue
- else:
- mod_removed += value
- mod_file[key] = 1
-
-
- _,m_add = add_values(commit_author_add)
- _,m_del = add_values(commit_author_del)
-
- branch_info = ''
- for k,v in commit_branchs.items():
- if k == 'master':
- continue
- else:
- if branch_info != '':
- branch_info += ','
- branch_info += str(v) + ' commits to ' + k
-
- print('Excluding merges, {} authors have pushed {} commits to master and {} commits to all branches, including {}.'.format(
- len(commit_author),
- commit_master,
- commit_other,
- branch_info))
-
-
-
- print('On all branchs including master, {} files have changed and there have been {},{} additions and {},{} deletions.'.format(
- len(mod_file),
- mod_add, m_add, mod_removed, m_del))
-
-
-
-
-
-
-
-
|