From 074ba34b3f3d0353222e296f22257a76af1da092 Mon Sep 17 00:00:00 2001 From: Jim Amsden Date: Tue, 23 Jun 2020 16:57:38 -0400 Subject: [PATCH 1/3] Update README.md GitHub is having trouble with fenced code blocks. Markdown editor works fine. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c00bd19..42bb304 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ source ~/.devml/bin/activate ``` #### Run Make All (installs, lints and tests) -```` +```Shell make all # #Example output @@ -85,7 +85,7 @@ dml.py 111 66 41% ---------------------------------------------- TOTAL 507 295 42% ... -```` +``` You don't use virtualenv or don't want to use it. No problem, just run `make all` it should probably work if you have python 3.6 installed. From 1cc3df5e4dc0ae8b09bb289e6d32b3104d5761e8 Mon Sep 17 00:00:00 2001 From: Jim Amsden Date: Tue, 23 Jun 2020 17:01:49 -0400 Subject: [PATCH 2/3] There was an extra fence --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 42bb304..ce58fb1 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Code is written to support Python 3.6 or greater. You can get that here: https make setup ``` This create a virtualenv in ~/.devml -``` + #### Next, source that virtualenv: ``` @@ -61,7 +61,7 @@ source ~/.devml/bin/activate ``` #### Run Make All (installs, lints and tests) -```Shell +``` make all # #Example output From fea7cebacb429f9e94fbe4abe71ae09e443fcc80 Mon Sep 17 00:00:00 2001 From: Jim Amsden Date: Wed, 7 Apr 2021 16:07:19 -0400 Subject: [PATCH 3/3] Improved how user information is accessed --- devml/mkdata.py | 41 ++++++++++++++-------------- devml/ts.py | 6 ++-- notebooks/ewm_data_exploration.ipynb | 2 +- 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/devml/mkdata.py b/devml/mkdata.py index a9eb0ad..8bdba27 100644 --- a/devml/mkdata.py +++ b/devml/mkdata.py @@ -122,47 +122,46 @@ def create_org_df(path): os.chdir(original_cwd) return converted_df -def create_projectarea_df(ccmServer, projectArea, userId, password): +# startDate and endDate are str of the form: YYYY/MM/DD +def create_projectarea_df(ccmServer, projectArea, userId, password, startDate=None, endDate=None): """Returns a Pandas DataFrome of change sets delivered to components in a project area""" # Get all the users managed by this server, we need this to get the author_email columns = ['date', 'id', 'author_name', 'author_email', 'message', 'repo', 'commits'] - # login to EWM CCM - p = Popen(f'lscm login -r {ccmServer} -u {userId} -P {password}', shell=True, stdout=PIPE) - resp = p.stdout.read().decode('utf-8') - if not resp.startswith('Logged in'): - log.error(f'Cannot login to {ccmServer}') - return None df = pd.DataFrame(columns=columns) - p = Popen(f'lscm list users -r {ccmServer} -j', shell=True, stdout=PIPE) - ewmUsers = json.load(p.stdout) - users = {user['name']: user['mail'] for user in ewmUsers} + created_after = f'--created-after {startDate}' if startDate is not None else '' + created_before = f'--created-before {endDate}' if endDate is not None else '' # get the components in the project area - p = Popen(f'lscm list components -r {ccmServer} -j', shell=True, stdout=PIPE) - components = json.load(p.stdout) - components = pd.DataFrame.from_dict(components['components']) if components != None else None + p = Popen(f'scm list components -r {ccmServer} --visibility projectarea --process-area "{projectArea}" --all -u {userId} -P {password} -j', shell=True, stdout=PIPE) + components = json.load(p.stdout) # a JSON object containing a components array of component objects: name, url, uuid + components = components['components'] if components is not None else None if components is None or len(components) == 0: log.info(f'Could not get components in project area: "{projectArea}"') return df - for component in components.name: - # get all the completed change sets delivered to this component - p = Popen(f'lscm list changesets -r {ccmServer} -C "{component}" -j', shell=True, stdout=PIPE) + for component in components: + # get all the completed change sets associated with this component + p = Popen(f"scm list changesets -r {ccmServer} -C '{component['uuid']}' {created_before} {created_after} -m all -u {userId} -P {password} -j", shell=True, stdout=PIPE) changes = None try: - changes = json.load(p.stdout) + changes = json.load(p.stdout) # a JSON object with one changes keyword that is an array of change objects changes = changes['changes'] if changes != None else None except: + log.exception(f"Could not get change sets for component: {component['name']}") continue - changesDicts = [dict(list(zip(columns, [change['modified'],change['uuid'],change['author'],None,change['comment'],component,None]))) for change in changes] + changesDicts = [dict(list(zip(columns, [ + change['modified'], # changeset modification date, the last time its state was changed + change['uuid'], # changeset uuid + change['author'], # changeset author_name + change['author-details']['mail'], + change['comment'], + component['name'], + 1]))) for change in changes] df = pd.concat([df, pd.DataFrame.from_dict(changesDicts)]) df['date'] = pd.to_datetime(df['date']) pd.DataFrame.set_index(df, keys='date', drop=True, inplace=True) - df['author_email'] = df['author_name'].apply(lambda author_name: users[author_name] if author_name in users else None) - df['commits']=1 p = Popen(f'lscm logout -r {ccmServer}', shell=True, stdout=PIPE) return df - def get_git_uid(): """ Uniquely identify git repo: diff --git a/devml/ts.py b/devml/ts.py index 0f4f038..a624c29 100644 --- a/devml/ts.py +++ b/devml/ts.py @@ -4,8 +4,10 @@ def convert_datetime(df): """Converts Git Timestamps to Pandas Datetime""" - - df['date'] = pd.to_datetime(df['date'],format="%a %b %d %X %Y") + try: + df['date'] = pd.to_datetime(df['date'],format="%a %b %d %X %Y") + except (ValueError, TypeError): # try again with UTC + df['date'] = pd.to_datetime(df['date'], infer_datetime_format=True) return df def date_index(df): diff --git a/notebooks/ewm_data_exploration.ipynb b/notebooks/ewm_data_exploration.ipynb index 074d30f..67e17c1 100644 --- a/notebooks/ewm_data_exploration.ipynb +++ b/notebooks/ewm_data_exploration.ipynb @@ -326,7 +326,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.8.3" }, "toc": { "base_numbering": 1,