Crawler works

This commit is contained in:
farhan 2020-10-29 03:08:30 +00:00
parent d5a20f836e
commit 5ce495b81c

View File

@ -104,6 +104,15 @@ func parseCommand(c net.Conn) {
} }
*/ */
func AppendIfMissing(hay []string, needle string) []string {
for _, ele := range hay {
if ele == needle {
return hay
}
}
return append(hay, needle)
}
func StartInstancePoll(endpoint string, min_id string, reportPostChan chan ReportPost, pollMessageChan chan PollMessage, reportInstanceChan chan ReportInstance) { func StartInstancePoll(endpoint string, min_id string, reportPostChan chan ReportPost, pollMessageChan chan PollMessage, reportInstanceChan chan ReportInstance) {
p := bluemonday.NewPolicy() p := bluemonday.NewPolicy()
newposts := make([]ReportPost, 0) newposts := make([]ReportPost, 0)
@ -124,6 +133,7 @@ func StartInstancePoll(endpoint string, min_id string, reportPostChan chan Repor
body, err := ioutil.ReadAll(resp.Body) body, err := ioutil.ReadAll(resp.Body)
err = json.Unmarshal(body, &newposts) err = json.Unmarshal(body, &newposts)
if err != nil { if err != nil {
fmt.Println("Unmarshal 3");
// Perhaps get rid of this if-condition? // Perhaps get rid of this if-condition?
if resp.StatusCode >= 400 && resp.StatusCode < 500 { if resp.StatusCode >= 400 && resp.StatusCode < 500 {
reportInstanceChan <- ReportInstance{endpoint, endpoint, resp.StatusCode} reportInstanceChan <- ReportInstance{endpoint, endpoint, resp.StatusCode}
@ -132,20 +142,25 @@ func StartInstancePoll(endpoint string, min_id string, reportPostChan chan Repor
} else { } else {
reportInstanceChan <- ReportInstance{endpoint, endpoint, UNMARSHAL_ERROR} reportInstanceChan <- ReportInstance{endpoint, endpoint, UNMARSHAL_ERROR}
} }
log.Fatal(err) //log.Fatal(err)
return return
} }
newinstances := make([]string, 0)
numposts := 0 numposts := 0
for _, newpost := range newposts { for _, newpost := range newposts {
posthash := sha1.New() posthash := sha1.New()
if strings.Contains(newpost.Account.Acct, "@") == false { at_sign := strings.Index(newpost.Account.Acct, "@")
if at_sign == -1 {
at_sign = len(newpost.Account.Acct)
newpost.Account.Acct += "@" + endpoint newpost.Account.Acct += "@" + endpoint
} }
// Calculate the post hash // Calculate the post hash
fmt.Fprint(posthash, newpost.Url) fmt.Fprint(posthash, newpost.Url)
fmt.Fprint(posthash, newpost.Content) fmt.Fprint(posthash, newpost.StrippedContent)
fmt.Fprint(posthash, newpost.Account.Acct) fmt.Fprint(posthash, newpost.Account.Acct)
fmt.Fprint(posthash, newpost.Account.Display_name) fmt.Fprint(posthash, newpost.Account.Display_name)
fmt.Fprint(posthash, newpost.Account.Url) fmt.Fprint(posthash, newpost.Account.Url)
@ -160,8 +175,22 @@ func StartInstancePoll(endpoint string, min_id string, reportPostChan chan Repor
min_id = newpost.Id min_id = newpost.Id
} }
numposts = numposts + 1 numposts = numposts + 1
newinstance := newpost.Account.Acct[at_sign+1:]
newinstances = AppendIfMissing(newinstances, newinstance)
} }
for _, newinstance := range newinstances {
var q ReportInstance
q.from = endpoint
q.endpoint = newinstance
q.status = NEW_INSTANCE
reportInstanceChan <- q
}
fmt.Println(newinstances)
pollMessageChan <- PollMessage{endpoint, resp.StatusCode, min_id, numposts} pollMessageChan <- PollMessage{endpoint, resp.StatusCode, min_id, numposts}
} }
@ -180,6 +209,7 @@ func StartGetPeers(endpoint string, reportInstanceChan chan ReportInstance) {
err = json.Unmarshal([]byte(body), &newpeers) err = json.Unmarshal([]byte(body), &newpeers)
if err != nil { if err != nil {
fmt.Println("Unmarshal 1");
log.Fatal(err) log.Fatal(err)
reportInstanceChan <- ReportInstance{endpoint, endpoint, UNMARSHAL_ERROR} reportInstanceChan <- ReportInstance{endpoint, endpoint, UNMARSHAL_ERROR}
return return
@ -229,8 +259,8 @@ func GetNodeInfo(endpoint string, nodeinfo *NodeInfo) {
body, err := ioutil.ReadAll(resp.Body) body, err := ioutil.ReadAll(resp.Body)
err = json.Unmarshal(body, &nodeinfo) err = json.Unmarshal(body, &nodeinfo)
fmt.Println("Body: " + string(body))
if err != nil { if err != nil {
fmt.Println("Unmarshal 2");
return return
} }
} }
@ -285,12 +315,10 @@ func writePost(pool *pgxpool.Pool, reportpost ReportPost) {
os.Exit(1) // For now I want this to die and learn why it failed os.Exit(1) // For now I want this to die and learn why it failed
return return
} }
fmt.Println("Properly executed")
} }
func SuspendInstance(suspendinstance ReportInstance, runninginstances *[]RunningInstance) { func SuspendInstance(suspendinstance ReportInstance, runninginstances *[]RunningInstance) {
fmt.Println("Suspend")
for _, runninginstance := range *runninginstances { for _, runninginstance := range *runninginstances {
if runninginstance.endpoint == suspendinstance.endpoint { if runninginstance.endpoint == suspendinstance.endpoint {
runninginstance.status = suspendinstance.status runninginstance.status = suspendinstance.status
@ -358,6 +386,7 @@ func main() {
go writePost(pool, v) go writePost(pool, v)
case w := <-reportInstanceChan: // Start or suspend instance case w := <-reportInstanceChan: // Start or suspend instance
if w.status == NEW_INSTANCE { if w.status == NEW_INSTANCE {
fmt.Println("NEW INSTANCE: ", w.endpoint)
NewInstance(w.endpoint, &runninginstances, reportInstanceChan, reportPostChan, pollMessageChan) NewInstance(w.endpoint, &runninginstances, reportInstanceChan, reportPostChan, pollMessageChan)
} else { } else {
SuspendInstance(w, &runninginstances) SuspendInstance(w, &runninginstances)