Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
RedditGraph
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
OpenSource
RedditGraph
Commits
e569883b
Commit
e569883b
authored
May 14, 2020
by
Mark Winter
Browse files
Options
Downloads
Patches
Plain Diff
Run partial checkpointed data with Laplacian Eigenmaps
parent
6f1aa502
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
.gitignore
+2
-0
2 additions, 0 deletions
.gitignore
reddit_graph.m
+72
-15
72 additions, 15 deletions
reddit_graph.m
reddit_run_partial.m
+22
-0
22 additions, 0 deletions
reddit_run_partial.m
reddit_testing.m
+1
-1
1 addition, 1 deletion
reddit_testing.m
with
97 additions
and
16 deletions
.gitignore
+
2
−
0
View file @
e569883b
*.zip
*.mat
This diff is collapsed.
Click to expand it.
reddit_graph.m
+
72
−
15
View file @
e569883b
function
G
=
reddit_graph
(
tokenObj
)
function
[
G
,
S
,
links
,
link_map
]
=
reddit_graph
(
tokenObj
)
%% Get a 1-hr token
if
(
~
exist
(
'tokenObj'
,
'var'
)
||
isempty
(
tokenObj
)
)
credStruct
=
load_reddit_creds
(
'reddit_creds.json'
);
...
...
@@ -8,32 +8,45 @@ function G = reddit_graph(tokenObj)
% A few constants
graph_params
=
struct
(
'max_depth'
,{
3
},
'link_limit'
,{
10
},
'comment_limit'
,{
10
},
'ignore_users'
,{{
'[deleted]'
;
'AutoModerator'
}});
%% Load first
100
/r/<subreddit>/hot entries and start building user graph
%% Load first
link_limit
/r/<subreddit>/hot entries and start building user graph
subreddit
=
'math'
;
req
=
sprintf
(
'r/%s/hot?limit=%d'
,
subreddit
,
graph_params
.
link_limit
);
r
=
reddit_query
(
req
,
tokenObj
);
%%
%% Major structures for checkpointing and graph-linkup
% This will be the user relationship graph
G
=
graph
();
% chk_links = struct('id',{}, 'subreddit',{}, 'user',{}, 'depth',{});
% Sparse bipartite map from Reddit posts (links) to user-graph edgeIDs
S
=
sparse
([]);
% Also, keep search-ordered listing of all Reddit links
links
=
struct
(
'id'
,{},
'subreddit'
,{},
'user'
,{},
'depth'
,{});
% Map Reddit post (linkIDs) to post titles
link_map
=
containers
.
Map
(
'KeyType'
,
'char'
,
'ValueType'
,
'char'
);
%%
%%
Seed the link listing with the popular entries
popular_links
=
r
.
data
.
children
;
chk_links
=
create_new_links
(
popular_links
,
0
,
link_map
,
graph_params
);
seed_links
=
create_new_links
(
popular_links
,
0
,
link_map
,
graph_params
);
[
links
,
S
]
=
add_links
(
seed_links
,
links
,
S
);
% Run through all listings breadth-first
while
(
~
isempty
(
chk_links
)
)
link
=
chk_links
(
1
);
[
new_links
,
G
]
=
process_link
(
link
,
G
,
link_map
,
graph_params
,
tokenObj
);
link_idx
=
1
;
while
(
link_idx
<=
length
(
links
)
)
[
new_links
,
G
,
S
]
=
process_link
(
links
,
link_idx
,
G
,
S
,
link_map
,
graph_params
,
tokenObj
);
[
links
,
S
]
=
add_links
(
new_links
,
links
,
S
);
link_idx
=
link_idx
+
1
;
chk_links
=
[
chk_links
(
2
:
end
);
new_links
]
;
reddit_search_checkpoint
(
G
,
S
,
links
,
link_map
,
link_idx
)
;
end
end
function
[
new_links
,
G
]
=
process_link
(
link
,
G
,
link_map
,
graph_params
,
tokenObj
)
%% Process a Reddit post (find all users associated with post and find links associated with those users)
function
[
new_links
,
G
,
S
]
=
process_link
(
links
,
link_idx
,
G
,
S
,
link_map
,
graph_params
,
tokenObj
)
new_links
=
[];
link
=
links
(
link_idx
);
if
(
link
.
depth
>
graph_params
.
max_depth
)
return
;
...
...
@@ -46,7 +59,7 @@ function [new_links, G] = process_link(link, G, link_map, graph_params, tokenObj
link_users
=
link_users
(
~
bIgnoreUsers
);
[
new_users
,
G
]
=
update_graph
(
link_users
,
G
);
[
new_users
,
G
,
S
]
=
update_graph
(
link_users
,
link_idx
,
G
,
S
);
%% Process users to find new links
% Don't bother adding new links if we're at max_depth
...
...
@@ -57,6 +70,28 @@ function [new_links, G] = process_link(link, G, link_map, graph_params, tokenObj
new_links
=
add_user_links
(
new_users
,
link
.
depth
+
1
,
link_map
,
graph_params
,
tokenObj
);
end
%% Checkpoint this search to a file for partial results checks and for restarting
function
reddit_search_checkpoint
(
G
,
S
,
links
,
link_map
,
link_idx
)
save_struct
=
struct
(
'G'
,{
G
},
...
'S'
,{
S
},
...
'links'
,{
links
},
...
'link_map'
,{
link_map
},
...
'link_idx'
,{
link_idx
});
max_keep_chkpt
=
3
;
chkpoint_prefix
=
'reddit_chkpoint'
;
now_string
=
datestr
(
now
,
'yyyy-mm-dd_HH-MM-SS'
);
filename
=
sprintf
(
'%s_%s.mat'
,
chkpoint_prefix
,
now_string
);
save
(
filename
,
'-struct'
,
'save_struct'
);
chkpt_list
=
dir
([
chkpoint_prefix
'_*.mat'
]);
if
(
length
(
chkpt_list
)
>
max_keep_chkpt
)
rm_list
=
{
chkpt_list
(
1
:(
end
-
max_keep_chkpt
))
.
name
};
delete
(
rm_list
{:});
end
end
%% Find all users who posted or commented on current post (linkID)
function
link_users
=
find_link_users
(
link
,
tokenObj
)
% Always start with the original post user
link_users
=
{
link
.
user
};
...
...
@@ -78,6 +113,7 @@ function link_users = find_link_users(link, tokenObj)
link_users
=
unique
([
link_users
;
comment_users
],
'stable'
);
end
%% Add new posts (linkIDs) from new users discovered in current post being processed
function
new_links
=
add_user_links
(
new_users
,
next_depth
,
link_map
,
graph_params
,
tokenObj
)
new_links
=
[];
for
i
=
1
:
length
(
new_users
)
...
...
@@ -86,6 +122,7 @@ function new_links = add_user_links(new_users, next_depth, link_map, graph_param
end
end
%% Get a limited number of posts and comments a user has created
function
new_links
=
query_user_links
(
user_id
,
next_depth
,
link_map
,
graph_params
,
tokenObj
)
new_links
=
[];
...
...
@@ -127,6 +164,17 @@ function new_links = query_user_links(user_id, next_depth, link_map, graph_param
new_links
=
create_new_links
(
posts_list
,
next_depth
,
link_map
,
graph_params
);
end
%% Add new links to main link exploration list and make new bipartite map nodes
% NOTE: We assume the links have already been verified to be new unique
% entries, see create_new_links() for that functionality
function
[
links
,
S
]
=
add_links
(
new_links
,
links
,
S
)
links
=
[
links
;
new_links
];
num_edges
=
size
(
S
,
1
);
S
=
[
S
,
zeros
(
num_edges
,
length
(
new_links
))];
end
%% Create link structures for unexplored new links
function
new_links
=
create_new_links
(
link_listing
,
depth
,
link_map
,
graph_params
)
new_links
=
[];
...
...
@@ -166,7 +214,8 @@ function new_links = create_new_links(link_listing, depth, link_map, graph_param
end
end
function
[
new_users
,
G
]
=
update_graph
(
link_users
,
G
)
%% Add users and edges to user-graph for a particular reddit post
function
[
new_users
,
G
,
S
]
=
update_graph
(
link_users
,
link_idx
,
G
,
S
)
nodeIDs
=
zeros
(
length
(
link_users
),
1
);
if
(
G
.
numnodes
>
0
)
nodeIDs
=
findnode
(
G
,
link_users
);
...
...
@@ -201,7 +250,15 @@ function [new_users, G] = update_graph(link_users, G)
% Add new edges
newS
=
s
(
~
bValidEdges
);
newT
=
t
(
~
bValidEdges
);
newEdgeIDs
=
[];
if
(
~
isempty
(
newS
)
)
G
=
addedge
(
G
,
newS
,
newT
,
1
);
newEdgeIDs
=
findedge
(
G
,
newS
,
newT
);
end
% Create mapping between linkID and edgeIDs
num_links
=
size
(
S
,
2
);
all_edges
=
[
edgeIDs
;
newEdgeIDs
];
S
=
[
S
;
zeros
(
length
(
newEdgeIDs
),
num_links
)];
S
(
all_edges
,
link_idx
)
=
1
;
end
This diff is collapsed.
Click to expand it.
reddit_run_partial.m
0 → 100644
+
22
−
0
View file @
e569883b
%% Load most recent checkpoint
ckpt_prefix
=
'reddit_chkpoint'
;
chkpt_list
=
dir
([
ckpt_prefix
'_*.mat'
]);
if
(
isempty
(
chkpt_list
)
)
warning
(
'No checkpoints found in current directory!'
);
return
;
end
load
(
chkpt_list
(
end
)
.
name
);
%% Plot the graph to see what it currently looks like
plot
(
G
);
%% Create a weighted adjancency matrix to
nn
=
numnodes
(
G
);
[
s
,
t
]
=
findedge
(
G
);
A
=
sparse
(
s
,
t
,
G
.
Edges
.
Weight
,
nn
,
nn
);
A
=
max
(
A
,
A
'
);
%% Clustering using an arbitrary k (Laplacian Eigenmaps)
idx
=
Cluster
.
kmeans_le
(
A
,
10
);
This diff is collapsed.
Click to expand it.
reddit_testing.m
+
1
−
1
View file @
e569883b
...
...
@@ -3,7 +3,7 @@ credStruct = load_reddit_creds('reddit_creds.json');
tokenObj
=
reddit_auth
(
credStruct
);
%% Generate a user-graph from Reddit
G
=
reddit_graph
(
tokenObj
);
[
G
,
S
,
links
,
link_map
]
=
reddit_graph
(
tokenObj
);
%% Create a weighted adjancency matrix to
nn
=
numnodes
(
G
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment