Skip to content

Commit c316477

Browse files
committed
Made requested changes
1 parent bbbf195 commit c316477

2 files changed

Lines changed: 40 additions & 180 deletions

File tree

scripts/2-process/github_process.py

Lines changed: 3 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def parse_arguments():
4444
help="Enable saving results (default: False)",
4545
)
4646
parser.add_argument(
47-
"--enable_git",
47+
"--enable-git",
4848
action="store_true",
4949
help="Enable git actions such as fetch, merge, add, commit, and push"
5050
" (default: False)",
@@ -70,43 +70,6 @@ def data_to_csv(args, data, file_path):
7070
)
7171

7272

73-
def process_totals_by_code_license(args, count_data):
74-
"""
75-
Processing count data: totals by Code License
76-
"""
77-
LOGGER.info(process_totals_by_code_license.__doc__.strip())
78-
data = {
79-
"Code License": 0,
80-
"Content License": 0,
81-
}
82-
for row in count_data.itertuples(index=False):
83-
tool = str(row.TOOL_IDENTIFIER)
84-
count = int(row.COUNT)
85-
86-
if tool == "Total public repositories":
87-
continue
88-
89-
if tool in [
90-
"MIT No Attribution",
91-
"BSD Zero Clause License",
92-
"Unlicense",
93-
]:
94-
key = "Code License"
95-
elif tool in ["CC0 1.0", "CC BY 4.0", "CC-BY-SA 4.0"]:
96-
key = "Content License"
97-
else:
98-
continue
99-
100-
data[key] += count
101-
data = pd.DataFrame(data.items(), columns=["Category", "Count"])
102-
data.sort_values("Count", ascending=False, inplace=True)
103-
data.reset_index(drop=True, inplace=True)
104-
file_path = shared.path_join(
105-
PATHS["data_phase"], "github_totals_by_code_license.csv"
106-
)
107-
data_to_csv(args, data, file_path)
108-
109-
11073
def process_totals_by_license(args, count_data):
11174
"""
11275
Processing count data: totals by License
@@ -124,7 +87,7 @@ def process_totals_by_license(args, count_data):
12487
data[tool] = count
12588

12689
data = pd.DataFrame(data.items(), columns=["License", "Count"])
127-
data.sort_values("Count", ascending=False, inplace=True)
90+
data.sort_values("License", ascending=True, inplace=True)
12891
data.reset_index(drop=True, inplace=True)
12992
file_path = shared.path_join(
13093
PATHS["data_phase"], "github_totals_by_license.csv"
@@ -158,52 +121,14 @@ def process_totals_by_restriction(args, count_data):
158121

159122
data[key] += count
160123
data = pd.DataFrame(data.items(), columns=["Category", "Count"])
161-
data.sort_values("Count", ascending=False, inplace=True)
124+
data.sort_values("Count", ascending=True, inplace=True)
162125
data.reset_index(drop=True, inplace=True)
163126
file_path = shared.path_join(
164127
PATHS["data_phase"], "github_totals_by_restriction.csv"
165128
)
166129
data_to_csv(args, data, file_path)
167130

168131

169-
def process_totals_by_rights_reserved(args, count_data):
170-
"""
171-
Processing count data: totals by Rights Reserved
172-
"""
173-
LOGGER.info(process_totals_by_rights_reserved.__doc__.strip())
174-
data = {
175-
"Rights reserved": 0,
176-
"No rights reserved": 0,
177-
}
178-
for row in count_data.itertuples(index=False):
179-
tool = str(row.TOOL_IDENTIFIER)
180-
count = int(row.COUNT)
181-
182-
if tool == "Total public repositories":
183-
continue
184-
185-
if tool in [
186-
"MIT No Attribution",
187-
"BSD Zero Clause License",
188-
"CC0 1.0",
189-
"Unlicense",
190-
]:
191-
key = "No rights reserved"
192-
elif tool in ["CC BY 4.0", "CC-BY-SA 4.0"]:
193-
key = "Rights reserved"
194-
else:
195-
continue
196-
197-
data[key] += count
198-
data = pd.DataFrame(data.items(), columns=["Category", "Count"])
199-
data.sort_values("Count", ascending=False, inplace=True)
200-
data.reset_index(drop=True, inplace=True)
201-
file_path = shared.path_join(
202-
PATHS["data_phase"], "github_totals_by_rights_reserved.csv"
203-
)
204-
data_to_csv(args, data, file_path)
205-
206-
207132
# def load_quarter_data(quarter):
208133
# """
209134
# Load data for a specific quarter.
@@ -255,9 +180,7 @@ def main():
255180
file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
256181
count_data = pd.read_csv(file_count, usecols=["TOOL_IDENTIFIER", "COUNT"])
257182
process_totals_by_license(args, count_data)
258-
process_totals_by_rights_reserved(args, count_data)
259183
process_totals_by_restriction(args, count_data)
260-
process_totals_by_code_license(args, count_data)
261184

262185
# Push changes
263186
args = shared.git_add_and_commit(

scripts/3-report/github_report.py

Lines changed: 37 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
# Setup
2727
LOGGER, PATHS = shared.setup(__file__)
2828
QUARTER = os.path.basename(PATHS["data_quarter"])
29-
SECTION = "Github Data"
29+
SECTION = "GitHub Data"
3030

3131

3232
def parse_arguments():
@@ -88,7 +88,7 @@ def load_data(args):
8888

8989
def github_intro(args):
9090
"""
91-
Write Github introduction.
91+
Write GitHub introduction.
9292
"""
9393
LOGGER.info(github_intro.__doc__.strip())
9494
file_path = shared.path_join(
@@ -105,25 +105,25 @@ def github_intro(args):
105105
"Overview",
106106
None,
107107
None,
108-
"Github data uses the `total_count` returned by"
108+
"GitHub data uses the `total_count` returned by"
109109
" API for search queries of the SPDX IDENTIFIER URLS"
110110
"\n"
111-
f"**The results indicate that a total of {total_repositories}"
112-
"repositories on GitHub use a mix of some rights reserved and"
113-
"no rights reserved licenses which showcases the usage of"
114-
"attribution based Creative Commons (CC) legal tool"
111+
f"**The results indicate that a total of {total_repositories} "
112+
"repositories on GitHub use a mix of some rights reserved and "
113+
"no rights reserved licenses which showcases the usage of "
114+
"attribution based Creative Commons (CC) legal tool "
115115
"and Public domain equivalent.**\n"
116116
"/n"
117117
"Thank you GitHub for providing public access to"
118118
"repository metadata through its API.",
119119
)
120120

121121

122-
def Plot_by_license_type(args):
122+
def plot_totals_by_license_type(args):
123123
"""
124-
Create plots for the languages with highest usage of latest tools
124+
Create plots showing totals by license type
125125
"""
126-
LOGGER.info(plot_totals_by_code_license.__doc__.strip())
126+
LOGGER.info(plot_totals_by_license_type.__doc__.strip())
127127
file_path = shared.path_join(
128128
PATHS["data_2-process"],
129129
"github_totals_by_license.csv",
@@ -132,7 +132,7 @@ def Plot_by_license_type(args):
132132
name_label = "License"
133133
data_label = "Count"
134134
data = pd.read_csv(file_path, index_col=name_label)
135-
data.sort_values(name_label, ascending=False, inplace=True)
135+
data.sort_values(data_label, ascending=True, inplace=True)
136136
title = "Totals by license type"
137137
plt = plot.combined_plot(
138138
args=args,
@@ -157,55 +157,20 @@ def Plot_by_license_type(args):
157157
SECTION,
158158
title,
159159
image_path,
160-
"Plots showing totals by license type.",
161-
)
162-
163-
164-
def plot_totals_by_code_license(args):
165-
"""
166-
Create plots for the languages with highest usage of latest tools
167-
"""
168-
LOGGER.info(plot_totals_by_code_license.__doc__.strip())
169-
file_path = shared.path_join(
170-
PATHS["data_2-process"],
171-
"github_totals_by_code_license.csv",
172-
)
173-
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
174-
name_label = "Category"
175-
data_label = "Count"
176-
data = pd.read_csv(file_path, index_col=name_label)
177-
data.sort_values(name_label, ascending=False, inplace=True)
178-
title = "Totals by Code License"
179-
plt = plot.combined_plot(
180-
args=args,
181-
data=data,
182-
title=title,
183-
name_label=name_label,
184-
data_label=data_label,
185-
)
186-
187-
image_path = shared.path_join(
188-
PATHS["data_phase"], "github_totals_code_license.png"
189-
)
190-
LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
191-
192-
if args.enable_save:
193-
# Create the directory if it does not exist
194-
os.makedirs(PATHS["data_phase"], exist_ok=True)
195-
plt.savefig(image_path)
196-
197-
shared.update_readme(
198-
args,
199-
SECTION,
200-
title,
201-
image_path,
202-
"Plots showing totals by code license vs content license.",
160+
"Plots showing totals by license type."
161+
"This shows the distribution of different licenses "
162+
"used in GitHub repositories. "
163+
"Allowing Commons to evaluate how freely softwares on "
164+
"GitHub are being used, modified, and shared "
165+
"and how developers choose to share their works. "
166+
"See more at [SPDX License List]"
167+
"(https://spdx.org/licenses/)",
203168
)
204169

205170

206171
def plot_totals_by_restriction(args):
207172
"""
208-
Create plots for the languages with highest usage of latest tools
173+
Create plots showing totals by restriction
209174
"""
210175
LOGGER.info(plot_totals_by_restriction.__doc__.strip())
211176
file_path = shared.path_join(
@@ -240,48 +205,22 @@ def plot_totals_by_restriction(args):
240205
SECTION,
241206
title,
242207
image_path,
243-
"Plots showing totals by restrictions.",
244-
)
245-
246-
247-
def plot_totals_by_rights_reserved(args):
248-
"""
249-
Create plots for the languages with highest usage of latest tools
250-
"""
251-
LOGGER.info(plot_totals_by_rights_reserved.__doc__.strip())
252-
file_path = shared.path_join(
253-
PATHS["data_2-process"],
254-
"github_totals_by_rights_reserved.csv",
255-
)
256-
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
257-
name_label = "Category"
258-
data_label = "Count"
259-
data = pd.read_csv(file_path, index_col=name_label)
260-
data.sort_values(name_label, ascending=False, inplace=True)
261-
title = "Totals by Rights Reserved vs No Rights Reserved"
262-
plt = plot.combined_plot(
263-
args=args,
264-
data=data,
265-
title=title,
266-
name_label=name_label,
267-
data_label=data_label,
268-
)
269-
270-
image_path = shared.path_join(
271-
PATHS["data_phase"], "github_rights_reserved.png"
272-
)
273-
LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
274-
if args.enable_save:
275-
# Create the directory if it does not exist
276-
os.makedirs(PATHS["data_phase"], exist_ok=True)
277-
plt.savefig(image_path)
278-
279-
shared.update_readme(
280-
args,
281-
SECTION,
282-
title,
283-
image_path,
284-
"Plots showing totals by rights reserved vs No rights reserved.",
208+
"Plots showing totals by different levels of restrictions."
209+
"Public domain includes works released under CC0, 0BSD and Unlicense "
210+
"meaning developers have waived all their rights to a software. "
211+
"Allowing anyone to freely use, modify, and distribute the code "
212+
"without restriction. "
213+
"See more at "
214+
"[Public-domain-equivalent license]"
215+
"(https://en.wikipedia.org/wiki/Public-domain-equivalent_license) "
216+
"While Permissive contains works under MIT-0 and CC BY 4.0 "
217+
"allows users to reuse the code with some conditions and attribution "
218+
"[Permissive license]"
219+
"(https://en.wikipedia.org/wiki/Permissive_software_license) "
220+
"and Copyleft contains works under CC BY-SA 4.0. "
221+
"which requires any derivative works to be licensed "
222+
"under the same terms. "
223+
"[Copyleft](https://en.wikipedia.org/wiki/Copyleft) ",
285224
)
286225

287226

@@ -290,10 +229,8 @@ def main():
290229
shared.paths_log(LOGGER, PATHS)
291230
shared.git_fetch_and_merge(args, PATHS["repo"])
292231
github_intro(args)
232+
plot_totals_by_license_type(args)
293233
plot_totals_by_restriction(args)
294-
plot_totals_by_code_license(args)
295-
Plot_by_license_type(args)
296-
plot_totals_by_rights_reserved(args)
297234

298235
# Add and commit changes
299236
args = shared.git_add_and_commit(

0 commit comments

Comments
 (0)