티스토리 뷰

나는 목록을 만드는이 코드를 작성했고, 트윗이 올 때마다 목록을 확인합니다. 텍스트가 존재하지 않으면 목록에 추가하십시오.

# Defines a list - It stores all unique tweets
tweetChecklist = [];

# All your tweets. I represent them as a list to test the code
AllTweets = ["Hello", "HelloFoo", "HelloBar", "Hello", "hello", "Bye"];

# Goes over all "tweets"
for current_tweet in AllTweets:
        # If tweet doesn't exist in the list
                if current_tweet not in tweetChecklist:
                            tweetChecklist.append(current_tweet);
                                        # Do what you want with this tweet, it won't appear two times...
                                        
                                        # Print ["Hello", "HelloFoo", "HelloBar", "hello", "Bye"]
                                        # Note that the second Hello doesn't show up - It's what you want
                                        # However, it's case sensitive.
                                        print(tweetIDlist);
                                        # Clear the list
                                        tweetChecklist = [];
                                        

내 솔루션을 구현 한 후 코드가 다음과 같이 표시되어야한다고 생각합니다.

from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import time
import json
import csv

# Define a list - It stores all unique tweets 
# Clear this list after completion of fetching all tweets
tweetChecklist = [];

ckey = ''
csecret = ''
atoken = ''
asecret = ''

class listener(StreamListener):
    def on_data(self, data):
            try:
                        all_data = json.loads(data)
                                    with open("backup.txt", 'a') as backup:
                                                    backup.write(str(all_data) + "\n")
                                                                    backup.close()
                                                                    
                                                                                text = str(all_data["text"]).encode("utf-8")
                                                                                            id = str(all_data["id"]).encode("utf-8")
                                                                                                        timestamp = str(all_data["timestamp_ms"]).encode("utf-8")
                                                                                                                    sn = str(all_data["user"]["screen_name"]).encode("utf-8")
                                                                                                                                user_id = str(all_data["user"]["id"]).encode("utf-8")
                                                                                                                                            create = str(all_data["created_at"]).encode("utf-8")
                                                                                                                                                        follower = str(all_data["user"]["followers_count"]).encode("utf-8")
                                                                                                                                                                    following = str(all_data["user"]["following"]).encode("utf-8")
                                                                                                                                                                                status = str(all_data["user"]["statuses_count"]).encode("utf-8")
                                                                                                                                                                                
                                                                                                                                                                                            # If the text does not exist in the list that stores all unique tweets
                                                                                                                                                                                                        if text not in tweetChecklist:
                                                                                                                                                                                                                        # Store it, so that on further times with the same text, 
                                                                                                                                                                                                                                        # it didn't reach this code
                                                                                                                                                                                                                                                        tweetChecklist.append(current_tweet);
                                                                                                                                                                                                                                                        
                                                                                                                                                                                                                                                                        # Now, do your unique stuff
                                                                                                                                                                                                                                                                                        contentlist = []
                                                                                                                                                                                                                                                                                                        contentlist.append(text)
                                                                                                                                                                                                                                                                                                                        contentlist.append(id)
                                                                                                                                                                                                                                                                                                                                        contentlist.append(timestamp)
                                                                                                                                                                                                                                                                                                                                                        contentlist.append(sn)
                                                                                                                                                                                                                                                                                                                                                                        contentlist.append(user_id)
                                                                                                                                                                                                                                                                                                                                                                                        contentlist.append(create)
                                                                                                                                                                                                                                                                                                                                                                                                        contentlist.append(follower)
                                                                                                                                                                                                                                                                                                                                                                                                                        contentlist.append(following)
                                                                                                                                                                                                                                                                                                                                                                                                                                        contentlist.append(status)
                                                                                                                                                                                                                                                                                                                                                                                                                                                        print contentlist
                                                                                                                                                                                                                                                                                                                                                                                                                                                                        f = open("tweets3.csv", 'ab')
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        wrt = csv.writer(f, dialect='excel')
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        try:
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            wrt.writerow(contentlist)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            except UnicodeEncodeError, UnicodeEncodeError:
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                return True
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                return True
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            except BaseException, e:
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            print 'failed on data',type(e),str(e)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            time.sleep(3)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    def on_error(self, status):
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                print "Error status:" + str(status)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    auth = OAuthHandler(ckey, csecret)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        auth.set_access_token(atoken, asecret)
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            twitterStream = Stream(auth, listener())
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                twitterStream.filter(track=["zikavirus"], languages=['en'])
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                


출처
https://stackoverflow.com/questions/39920097
댓글
공지사항
Total
Today
Yesterday
«   2025/06   »
1 2 3 4 5 6 7
8 9 10 11 12 13 14
15 16 17 18 19 20 21
22 23 24 25 26 27 28
29 30