Skip to content

Commit 821a8e2

Browse files
committed
Applied to IEEE VIS dataset
Also test power log scale to highlight significant authors
1 parent 65c5228 commit 821a8e2

5 files changed

Lines changed: 150 additions & 66 deletions

File tree

data/IEEE VIS papers 1990-2016 - Main dataset.csv

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

data/emptywheel.csv

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

index.html

Lines changed: 10 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -5,77 +5,24 @@
55
<meta http-equiv="X-UA-Compatible" content="ie=edge">
66
<title>WordStream</title>
77
<script src="./lib/d3.min.js"></script>
8+
<script src="./lib/loaddata.js"></script>
89
<script src="./lib/d3.layout.wordstream.js"></script>
910
<script src="./test/testutil.js"></script>
1011
</head>
1112
<body>
1213
<script>
13-
var url = "./data/emptywheel.csv";
14-
var topics = [];
15-
d3.csv(url, function(error, rawData) {
16-
if (error) throw error;
17-
var inputFormat = d3.time.format('%Y-%m-%dT%H:%M:%S');
18-
var outputFormat = d3.time.format('%b %Y');
19-
topics = d3.keys(rawData[0]).slice(2, 6);
20-
//Filter and take only dates in 2013
21-
rawData = rawData.filter(function(d){
22-
var time = inputFormat.parse(d.time);
23-
var starDate = inputFormat.parse('2014-01-01T00:00:00');
24-
var endDate = inputFormat.parse('2014-07-01T00:00:00');
25-
return time >= starDate && time < endDate;
26-
});
27-
var data = {};
28-
d3.map(rawData, function(d, i){
29-
var date = inputFormat.parse(d.time);
30-
var date = outputFormat(date);
31-
topics.forEach(topic => {
32-
if(!data[date]) data[date] = {};
33-
data[date][topic] += data[date][topic] ? ('|' +d[topic]): (d[topic]);
34-
});
35-
});
36-
var data = d3.keys(data).map(function(date, i){
37-
var words = {};
38-
topics.forEach(topic => {
39-
var raw = {};
40-
raw[topic] = data[date][topic].split('|');
41-
//Count word frequencies
42-
var counts = raw[topic].reduce(function(obj, word){
43-
if(!obj[word]){
44-
obj[word] = 0;
45-
}
46-
obj[word]++;
47-
return obj;
48-
}, {});
49-
//Convert to array of objects
50-
words[topic] = d3.keys(counts).map(function(d){
51-
return{
52-
text: d,
53-
frequency: counts[d],
54-
topic: topic
55-
}
56-
}).sort(function(a, b){//sort the terms by frequency
57-
return b.frequency-a.frequency;
58-
}).filter(function(d){return d.text; })//filter out empty words
59-
.slice(0, 30);
60-
});
61-
return {
62-
date: date,
63-
words: words
64-
}
65-
}).sort(function(a, b){//sort by date
66-
return outputFormat.parse(a.date) - outputFormat.parse(b.date);
67-
});
68-
draw(data);
69-
});
14+
//loadEmptyWheelData(draw);
15+
loadIEEEVisData(draw);
7016
function draw(data){
7117
//Layout data
72-
var width = 720, height = 300;
18+
var width = 800, height = 300;
7319
var interpolation = "cardinal";
7420
var axisPadding = 10;
7521
var margins = {left: 20, top: 20, right: 10, bottom: 30};
7622
var ws = d3.layout.wordStream()
7723
.size([width, height])
7824
.interpolate(interpolation)
25+
.fontScale(d3.scale.pow().exponent(4))
7926
.data(data);
8027
var boxes = ws.boxes();
8128

@@ -146,7 +93,6 @@
14693
var termColorMap = d3.scale.ordinal()
14794
.domain(uniqueTerms)
14895
.range(c20.range());
149-
15096
mainGroup.selectAll('g').data(allWords).enter().append('g')
15197
.attr({
15298
transform: function(d){return 'translate('+d.x+', '+d.y+')rotate('+d.rotate+')';}
@@ -168,6 +114,7 @@
168114
//Highlight
169115
mainGroup.selectAll('text').on('mouseenter', function(){
170116
var thisText = d3.select(this);
117+
thisText.style('cursor', 'pointer');
171118
prevColor = thisText.attr('fill');
172119
var text = thisText.text();
173120
var topic = thisText.attr('topic');
@@ -181,6 +128,7 @@
181128
});
182129
mainGroup.selectAll('text').on('mouseout', function(){
183130
var thisText = d3.select(this);
131+
thisText.style('cursor', 'default');
184132
var text = thisText.text();
185133
var topic = thisText.attr('topic');
186134
var allTexts = mainGroup.selectAll('text').filter(t =>{
@@ -236,6 +184,9 @@
236184
transform: function(d, i){return 'translate('+thePoint.x+','+(thePoint.y0+thePoint.y-fontSize/2)+')';},
237185
});
238186
});
187+
//Add the first and the last points
188+
points[0].y = points[1].y;//First point
189+
points[points.length-1].y = points[points.length-2].y;//Last point
239190
//Append stream
240191
wordStreamG.append('path')
241192
.datum(points)

lib/d3.layout.wordstream.js

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ d3.layout.wordStream = function(){
5252

5353
//#region helper functions
5454
function buildFontScale(data){
55+
5556
var topics = d3.keys(data[0].words);
5657
//#region scale for the font size.
5758
var maxFrequency = 0;
@@ -339,6 +340,9 @@ d3.layout.wordStream = function(){
339340
c.fillText(d.text, 0, 0);
340341
if (d.padding) c.lineWidth = 2 * d.padding, c.strokeText(d.text, 0, 0);
341342
c.restore();
343+
if(w<=0 || typeof w === "undefined"){
344+
debugger
345+
}
342346
d.width = w;
343347
d.height = h;
344348
d.x = x;
@@ -365,12 +369,12 @@ d3.layout.wordStream = function(){
365369
h = d.height,
366370
x = d.x,
367371
y = d.y;
368-
var pixels = c.getImageData(d.x, d.y, d.width, d.height).data;
369-
370-
d.sprite = Array();
371-
for(var i = 0; i<<2 < pixels.length; i++){
372-
d.sprite.push(pixels[i<<2]);
373-
}
372+
373+
var pixels = c.getImageData(d.x, d.y, d.width, d.height).data;
374+
d.sprite = Array();
375+
for(var i = 0; i<<2 < pixels.length; i++){
376+
d.sprite.push(pixels[i<<2]);
377+
}
374378
}
375379
});
376380
}

lib/loaddata.js

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
function loadEmptyWheelData(draw){
2+
var url = "./data/emptywheel.csv";
3+
var topics = [];
4+
d3.csv(url, function(error, rawData) {
5+
if (error) throw error;
6+
var inputFormat = d3.time.format('%Y-%m-%dT%H:%M:%S');
7+
var outputFormat = d3.time.format('%b %Y');
8+
topics = d3.keys(rawData[0]).slice(2, 6);
9+
//Filter and take only dates in 2013
10+
rawData = rawData.filter(function(d){
11+
var time = inputFormat.parse(d.time);
12+
var starDate = inputFormat.parse('2013-01-01T00:00:00');
13+
var endDate = inputFormat.parse('2013-07-01T00:00:00');
14+
return time >= starDate && time < endDate;
15+
});
16+
var data = {};
17+
d3.map(rawData, function(d, i){
18+
var date = inputFormat.parse(d.time);
19+
var date = outputFormat(date);
20+
topics.forEach(topic => {
21+
if(!data[date]) data[date] = {};
22+
data[date][topic] += data[date][topic] ? ('|' +d[topic]): (d[topic]);
23+
});
24+
});
25+
var data = d3.keys(data).map(function(date, i){
26+
var words = {};
27+
topics.forEach(topic => {
28+
var raw = {};
29+
raw[topic] = data[date][topic].split('|');
30+
//Count word frequencies
31+
var counts = raw[topic].reduce(function(obj, word){
32+
if(!obj[word]){
33+
obj[word] = 0;
34+
}
35+
obj[word]++;
36+
return obj;
37+
}, {});
38+
//Convert to array of objects
39+
words[topic] = d3.keys(counts).map(function(d){
40+
return{
41+
text: d,
42+
frequency: counts[d],
43+
topic: topic
44+
}
45+
}).sort(function(a, b){//sort the terms by frequency
46+
return b.frequency-a.frequency;
47+
}).filter(function(d){return d.text; })//filter out empty words
48+
.slice(0, 30);
49+
});
50+
return {
51+
date: date,
52+
words: words
53+
}
54+
}).sort(function(a, b){//sort by date
55+
return outputFormat.parse(a.date) - outputFormat.parse(b.date);
56+
});
57+
draw(data);
58+
});
59+
}
60+
function loadIEEEVisData(draw){
61+
var url = "./data/IEEE VIS papers 1990-2016 - Main dataset.csv";
62+
var topics = [];
63+
var stopwords = "i|me|my|myself|we|our|ours|ourselves|you|your|yours|yourself|yourselves|he|him|his|himself|she|her|hers|herself|it|its|itself|they|them|their|theirs|themselves|what|which|who|whom|this|that|these|those|am|is|are|was|were|be|been|being|have|has|had|having|do|does|did|doing|a|an|the|and|but|if|or|because|as|until|while|of|at|by|for|with|about|against|between|into|through|during|before|after|above|below|to|from|up|down|in|out|on|off|over|under|again|further|then|once|here|there|when|where|why|how|all|any|both|each|few|more|most|other|some|such|no|nor|not|only|own|same|so|than|too|very|s|t|can|will|just|don|should|now";
64+
d3.csv(url, function(error, rawData) {
65+
if (error) throw error;
66+
67+
topics = ['Authors'];
68+
//topics = ['PaperType', 'Authors', 'Keywords'];
69+
var data = {};
70+
var splitters = {
71+
'Authors': ';',
72+
'Keywords': ';'
73+
};
74+
rawData = rawData.filter(d=>{
75+
return d.Year >= 2009 && d.Year <= 2016;
76+
});
77+
d3.map(rawData, function(d, i){
78+
var year = +d.Year;
79+
if(!data[year]) data[year] = {};
80+
topics.forEach(topic =>{
81+
//If it is title then remove stop words
82+
if(topic==='Title'){
83+
d[topic] = d[topic].replace(new RegExp('\\b('+stopwords+')\\b', 'g'), '');
84+
//Remove multiple spaces
85+
d[topic] = d[topic].replace(/\s+/g, ' ');
86+
}
87+
data[year][topic] += data[year][topic] ? (splitters[topic] + d[topic]): (d[topic]);
88+
});
89+
});
90+
var data = d3.keys(data).map(function(year, i){
91+
var words = {};
92+
topics.forEach(topic => {
93+
var raw = {};
94+
if(topic === 'Title'){
95+
raw[topic] = data[year][topic].match(/("[^"]+"|[^"\s]+)/g);
96+
}else{
97+
raw[topic] = data[year][topic].split(splitters[topic]);
98+
}
99+
//Count word frequencies
100+
var counts = raw[topic].reduce(function(obj, word){
101+
if(!obj[word]){
102+
obj[word] = 0;
103+
}
104+
obj[word]++;
105+
return obj;
106+
}, {});
107+
//Convert to array of objects
108+
words[topic] = d3.keys(counts).map(function(d){
109+
return{
110+
text: d,
111+
frequency: counts[d],
112+
topic: topic
113+
}
114+
}).sort(function(a, b){//sort the terms by frequency
115+
return b.frequency-a.frequency;
116+
}).filter(function(d){return d.text; })//filter out empty words
117+
.slice(0, 30);
118+
});
119+
return {
120+
date: year,
121+
words: words
122+
}
123+
}).sort(function(a, b){//sort by date
124+
return a.date - b.date;
125+
});
126+
draw(data);
127+
});
128+
}

0 commit comments

Comments
 (0)