Twint Twitter Herramienta OSINT hoja de trucos
Overview
Twint is an advanced Twitter scraping tool written in Python that allows for scraping tweets from Twitter profiles without using Twitter's API. It can fetch tweets, followers, following, retweets, and more while bypassing most of Twitter's limitations. Twint is particularly useful for OSINT investigations, social media monitoring, and research purposes.
⚠️ Legal Notice: Only use Twint for legitimate research, OSINT investigations, or authorized security testing. Respect Twitter's terms of servicio and applicable privacy laws.
instalación
Python pip instalación
# Install via pip
pip3 install twint
# Install development version
pip3 install --user --upgrade git+https://github.com/twintproject/twint.git@origin/master#egg=twint
# Install with additional dependencies
pip3 install twint[all]
# Verify instalación
twint --help
Docker instalación
# Pull Docker image
docker pull twintproject/twint
# Run with Docker
docker run -it --rm twintproject/twint
# Build from source
git clone https://github.com/twintproject/twint.git
cd twint
docker build -t twint .
# Run with volume mount
docker run -it --rm -v $(pwd)/output:/output twint
Manual instalación
# Clone repository
git clone https://github.com/twintproject/twint.git
cd twint
# Install dependencies
pip3 install -r requirements.txt
# Install package
python3 setup.py install
# Alternative: Run directly
python3 -m twint --help
Virtual Environment Setup
# Create virtual environment
python3 -m venv twint-env
source twint-env/bin/activate
# Install Twint
pip install twint
# Verify instalación
twint --version
Basic uso
comando Line Interface
# Basic tweet scraping
twint -u nombre de usuario
# Scrape tweets with specific search term
twint -s "search term"
# Scrape tweets from specific user
twint -u elonmusk
# Limit number of tweets
twint -u nombre de usuario --limit 100
# Save to file
twint -u nombre de usuario -o tweets.csv --csv
# Search with date range
twint -s "cybersecurity" --since "2023-01-01" --until "2023-12-31"
Python API uso
impuerto twint
# Configure Twint
c = twint.Config()
c.nombre de usuario = "nombre de usuario"
c.Limit = 100
c.Store_csv = True
c.Output = "tweets.csv"
# Run search
twint.run.Search(c)
Advanced Search opcións
User-based Searches
# Get user's tweets
twint -u nombre de usuario
# Get user's followers
twint -u nombre de usuario --followers
# Get user's following
twint -u nombre de usuario --following
# Get user's favorites/likes
twint -u nombre de usuario --favorites
# Get user information
twint -u nombre de usuario --user-full
# Get verified users only
twint -s "search term" --verified
Content-based Searches
# Search by claveword
twint -s "cybersecurity"
# Search with hashtag
twint -s "#infosec"
# Search with multiple clavewords
twint -s "cybersecurity OR infosec"
# Search for exact phrase
twint -s '"exact phrase"'
# Search excluding terms
twint -s "cybersecurity -spam"
# Search for tweets with links
twint -s "cybersecurity" --links
# Search for tweets with media
twint -s "cybersecurity" --media
Geographic and Language Filters
# Search by location
twint -s "cybersecurity" --near "New York"
# Search with specific language
twint -s "cybersecurity" --lang en
# Search with geolocation
twint -s "cybersecurity" --geo "40.7128,-74.0060,10km"
# Search popular tweets only
twint -s "cybersecurity" --popular
# Search for tweets with minimum likes
twint -s "cybersecurity" --min-likes 10
# Search for tweets with minimum retweets
twint -s "cybersecurity" --min-retweets 5
Date and Time Filters
# Search with date range
twint -s "cybersecurity" --since "2023-01-01" --until "2023-12-31"
# Search tweets from specific year
twint -s "cybersecurity" --year 2023
# Search tweets from specific hour
twint -s "cybersecurity" --hour 14
# Search tweets from today
twint -s "cybersecurity" --since $(date +%Y-%m-%d)
# Search tweets from last week
twint -s "cybersecurity" --since $(date -d '7 days ago' +%Y-%m-%d)
Output Formats and Storage
File Output opcións
# Save as CSV
twint -u nombre de usuario -o output.csv --csv
# Save as JSON
twint -u nombre de usuario -o output.json --json
# Save as text file
twint -u nombre de usuario -o output.txt
# Custom CSV format
twint -u nombre de usuario --csv --output tweets.csv --custom-csv "date,time,nombre de usuario,tweet"
# Hide output (silent mode)
twint -u nombre de usuario --hide-output
# Debug mode
twint -u nombre de usuario --debug
Database Storage
# Store in Elasticsearch
twint -u nombre de usuario --elasticsearch localhost:9200
# Store in SQLite database
twint -u nombre de usuario --database tweets.db
# Store with custom database table
twint -u nombre de usuario --database tweets.db --table-tweets custom_tweets
Advanced Output configuración
impuerto twint
# Configure advanced output
c = twint.Config()
c.nombre de usuario = "nombre de usuario"
c.Store_csv = True
c.Output = "detailed_tweets.csv"
c.Custom_csv = ["date", "time", "nombre de usuario", "tweet", "replies_count", "retweets_count", "likes_count", "hashtags", "urls"]
c.Hide_output = True
# Run search
twint.run.Search(c)
Python API Advanced uso
Basic configuración
impuerto twint
impuerto pandas as pd
def scrape_user_tweets(nombre de usuario, limit=100):
"""Scrape tweets from specific user"""
c = twint.Config()
c.nombre de usuario = nombre de usuario
c.Limit = limit
c.Store_pandas = True
c.Hide_output = True
twint.run.Search(c)
# Get pandas dataframe
tweets_df = twint.storage.panda.Tweets_df
return tweets_df
# uso
tweets = scrape_user_tweets("elonmusk", 50)
print(f"Scraped \\\\{len(tweets)\\\\} tweets")
Advanced Search configuración
impuerto twint
from datetime impuerto datetime, timedelta
def advanced_search(search_term, days_back=7, min_likes=5):
"""Advanced search with multiple filters"""
c = twint.Config()
# Search configuración
c.Search = search_term
c.Lang = "en"
c.Min_likes = min_likes
c.Popular_tweets = True
# Date range (last N days)
end_date = datetime.now()
start_date = end_date - timedelta(days=days_back)
c.Since = start_date.strftime("%Y-%m-%d")
c.Until = end_date.strftime("%Y-%m-%d")
# Output configuración
c.Store_pandas = True
c.Hide_output = True
# Run search
twint.run.Search(c)
# proceso results
if twint.storage.panda.Tweets_df is not None:
tweets_df = twint.storage.panda.Tweets_df
return tweets_df
else:
return pd.DataFrame()
# uso
cybersec_tweets = advanced_search("cybersecurity", days_back=30, min_likes=10)
print(f"Found \\\\{len(cybersec_tweets)\\\\} popular cybersecurity tweets")
User Analysis Functions
impuerto twint
impuerto pandas as pd
from collections impuerto Counter
class TwitterOSINT:
def __init__(self):
self.tweets_df = None
self.users_df = None
def analyze_user(self, nombre de usuario):
"""Comprehensive user analysis"""
# Get user tweets
c = twint.Config()
c.nombre de usuario = nombre de usuario
c.Limit = 1000
c.Store_pandas = True
c.Hide_output = True
twint.run.Search(c)
self.tweets_df = twint.storage.panda.Tweets_df
if self.tweets_df is not None and not self.tweets_df.empty:
analysis = \\\\{
'nombre de usuario': nombre de usuario,
'total_tweets': len(self.tweets_df),
'date_range': \\\\{
'earliest': self.tweets_df['date'].min(),
'latest': self.tweets_df['date'].max()
\\\\},
'engagement': \\\\{
'avg_likes': self.tweets_df['likes_count'].mean(),
'avg_retweets': self.tweets_df['retweets_count'].mean(),
'avg_replies': self.tweets_df['replies_count'].mean()
\\\\},
'top_hashtags': self.get_top_hashtags(),
'top_mentions': self.get_top_mentions(),
'posting_patterns': self.analyze_posting_patterns()
\\\\}
return analysis
else:
return None
def get_top_hashtags(self, top_n=10):
"""Extract top hashtags from tweets"""
if self.tweets_df is None:
return []
all_hashtags = []
for hashtags in self.tweets_df['hashtags'].dropna():
if hashtags:
all_hashtags.extend(hashtags)
return Counter(all_hashtags).most_common(top_n)
def get_top_mentions(self, top_n=10):
"""Extract top mentions from tweets"""
if self.tweets_df is None:
return []
all_mentions = []
for mentions in self.tweets_df['mentions'].dropna():
if mentions:
all_mentions.extend(mentions)
return Counter(all_mentions).most_common(top_n)
def analyze_posting_patterns(self):
"""Analyze posting time patterns"""
if self.tweets_df is None:
return \\\\{\\\\}
# Convert time to hour
self.tweets_df['hour'] = pd.to_datetime(self.tweets_df['time']).dt.hour
patterns = \\\\{
'hourly_distribution': self.tweets_df['hour'].value_counts().to_dict(),
'most_active_hour': self.tweets_df['hour'].mode().iloc[0] if not self.tweets_df['hour'].empty else None,
'daily_tweet_count': self.tweets_df.groupby('date').size().mean()
\\\\}
return patterns
def search_and_analyze(self, search_term, limit=500):
"""Search for tweets and analyze patterns"""
c = twint.Config()
c.Search = search_term
c.Limit = limit
c.Store_pandas = True
c.Hide_output = True
twint.run.Search(c)
self.tweets_df = twint.storage.panda.Tweets_df
if self.tweets_df is not None and not self.tweets_df.empty:
analysis = \\\\{
'search_term': search_term,
'total_tweets': len(self.tweets_df),
'unique_users': self.tweets_df['nombre de usuario'].nunique(),
'top_users': self.tweets_df['nombre de usuario'].value_counts().head(10).to_dict(),
'engagement_stats': \\\\{
'total_likes': self.tweets_df['likes_count'].sum(),
'total_retweets': self.tweets_df['retweets_count'].sum(),
'avg_engagement': (self.tweets_df['likes_count'] + self.tweets_df['retweets_count']).mean()
\\\\},
'top_hashtags': self.get_top_hashtags(),
'sentiment_indicators': self.basic_sentiment_analysis()
\\\\}
return analysis
else:
return None
def basic_sentiment_analysis(self):
"""Basic sentiment analysis using claveword matching"""
if self.tweets_df is None:
return \\\\{\\\\}
positive_words = ['good', 'great', 'excellent', 'amazing', 'love', 'best', 'awesome']
negative_words = ['bad', 'terrible', 'awful', 'hate', 'worst', 'horrible', 'disgusting']
positive_count = 0
negative_count = 0
for tweet in self.tweets_df['tweet'].str.lower():
if any(word in tweet for word in positive_words):
positive_count += 1
if any(word in tweet for word in negative_words):
negative_count += 1
total_tweets = len(self.tweets_df)
return \\\\{
'positive_tweets': positive_count,
'negative_tweets': negative_count,
'neutral_tweets': total_tweets - positive_count - negative_count,
'positive_ratio': positive_count / total_tweets if total_tweets > 0 else 0,
'negative_ratio': negative_count / total_tweets if total_tweets > 0 else 0
\\\\}
# uso ejemplo
osint = TwitterOSINT()
# Analyze specific user
user_analysis = osint.analyze_user("elonmusk")
if user_analysis:
print(f"User Analysis for \\\\{user_analysis['nombre de usuario']\\\\}:")
print(f"Total tweets: \\\\{user_analysis['total_tweets']\\\\}")
print(f"Average likes: \\\\{user_analysis['engagement']['avg_likes']:.2f\\\\}")
print(f"Top hashtags: \\\\{user_analysis['top_hashtags'][:5]\\\\}")
# Search and analyze topic
topic_analysis = osint.search_and_analyze("cybersecurity", limit=200)
if topic_analysis:
print(f"\nTopic Analysis for '\\\\{topic_analysis['search_term']\\\\}':")
print(f"Total tweets: \\\\{topic_analysis['total_tweets']\\\\}")
print(f"Unique users: \\\\{topic_analysis['unique_users']\\\\}")
print(f"Average engagement: \\\\{topic_analysis['engagement_stats']['avg_engagement']:.2f\\\\}")
OSINT Investigation Workflows
objetivo User Investigation
#!/usr/bin/env python3
# twitter-user-investigation.py
impuerto twint
impuerto pandas as pd
impuerto json
from datetime impuerto datetime, timedelta
impuerto matplotlib.pyplot as plt
impuerto seaborn as sns
class TwitterUserInvestigation:
def __init__(self, nombre de usuario):
self.nombre de usuario = nombre de usuario
self.tweets_df = None
self.followers_df = None
self.following_df = None
self.results = \\\\{\\\\}
def collect_user_data(self):
"""Collect comprehensive user data"""
print(f"Investigating Twitter user: \\\\{self.nombre de usuario\\\\}")
# Collect tweets
self.collect_tweets()
# Collect followers (limited)
self.collect_followers()
# Collect following (limited)
self.collect_following()
# Analyze collected data
self.analyze_data()
def collect_tweets(self, limit=1000):
"""Collect user tweets"""
print("Collecting tweets...")
c = twint.Config()
c.nombre de usuario = self.nombre de usuario
c.Limit = limit
c.Store_pandas = True
c.Hide_output = True
try:
twint.run.Search(c)
self.tweets_df = twint.storage.panda.Tweets_df
print(f"Collected \\\\{len(self.tweets_df)\\\\} tweets")
except Exception as e:
print(f"Error collecting tweets: \\\\{e\\\\}")
def collect_followers(self, limit=100):
"""Collect user followers"""
print("Collecting followers...")
c = twint.Config()
c.nombre de usuario = self.nombre de usuario
c.Limit = limit
c.Store_pandas = True
c.Hide_output = True
try:
twint.run.Followers(c)
self.followers_df = twint.storage.panda.Follow_df
print(f"Collected \\\\{len(self.followers_df)\\\\} followers")
except Exception as e:
print(f"Error collecting followers: \\\\{e\\\\}")
def collect_following(self, limit=100):
"""Collect users being followed"""
print("Collecting following...")
c = twint.Config()
c.nombre de usuario = self.nombre de usuario
c.Limit = limit
c.Store_pandas = True
c.Hide_output = True
try:
twint.run.Following(c)
self.following_df = twint.storage.panda.Follow_df
print(f"Collected \\\\{len(self.following_df)\\\\} following")
except Exception as e:
print(f"Error collecting following: \\\\{e\\\\}")
def analyze_data(self):
"""Analyze collected data"""
if self.tweets_df is not None and not self.tweets_df.empty:
self.results = \\\\{
'basic_stats': self.get_basic_stats(),
'temporal_analysis': self.analyze_temporal_patterns(),
'content_analysis': self.analyze_content(),
'network_analysis': self.analyze_network(),
'behavioral_patterns': self.analyze_behavior()
\\\\}
def get_basic_stats(self):
"""Get basic statistics"""
return \\\\{
'total_tweets': len(self.tweets_df),
'date_range': \\\\{
'first_tweet': self.tweets_df['date'].min(),
'last_tweet': self.tweets_df['date'].max()
\\\\},
'engagement': \\\\{
'total_likes': self.tweets_df['likes_count'].sum(),
'total_retweets': self.tweets_df['retweets_count'].sum(),
'total_replies': self.tweets_df['replies_count'].sum(),
'avg_likes': self.tweets_df['likes_count'].mean(),
'avg_retweets': self.tweets_df['retweets_count'].mean()
\\\\}
\\\\}
def analyze_temporal_patterns(self):
"""Analyze posting time patterns"""
# Convert datetime
self.tweets_df['datetime'] = pd.to_datetime(self.tweets_df['date'] + ' ' + self.tweets_df['time'])
self.tweets_df['hour'] = self.tweets_df['datetime'].dt.hour
self.tweets_df['day_of_week'] = self.tweets_df['datetime'].dt.day_name()
return \\\\{
'hourly_pattern': self.tweets_df['hour'].value_counts().to_dict(),
'daily_pattern': self.tweets_df['day_of_week'].value_counts().to_dict(),
'most_active_hour': self.tweets_df['hour'].mode().iloc[0],
'most_active_day': self.tweets_df['day_of_week'].mode().iloc[0],
'posting_frequency': len(self.tweets_df) / max(1, (self.tweets_df['datetime'].max() - self.tweets_df['datetime'].min()).days)
\\\\}
def analyze_content(self):
"""Analyze tweet content"""
# Extract hashtags and mentions
all_hashtags = []
all_mentions = []
all_urls = []
for _, row in self.tweets_df.iterrows():
if row['hashtags']:
all_hashtags.extend(row['hashtags'])
if row['mentions']:
all_mentions.extend(row['mentions'])
if row['urls']:
all_urls.extend(row['urls'])
return \\\\{
'top_hashtags': pd.Series(all_hashtags).value_counts().head(10).to_dict(),
'top_mentions': pd.Series(all_mentions).value_counts().head(10).to_dict(),
'url_domains': self.extract_domains(all_urls),
'tweet_length_stats': \\\\{
'avg_length': self.tweets_df['tweet'].str.len().mean(),
'max_length': self.tweets_df['tweet'].str.len().max(),
'min_length': self.tweets_df['tweet'].str.len().min()
\\\\}
\\\\}
def extract_domains(self, urls):
"""Extract domains from URLs"""
from urllib.parse impuerto urlparse
domains = []
for url in urls:
try:
domain = urlparse(url).netloc
if domain:
domains.append(domain)
except:
continue
return pd.Series(domains).value_counts().head(10).to_dict()
def analyze_network(self):
"""Analyze network conexións"""
network_data = \\\\{\\\\}
if self.followers_df is not None:
network_data['followers_count'] = len(self.followers_df)
if self.following_df is not None:
network_data['following_count'] = len(self.following_df)
# Analyze interaction patterns
if self.tweets_df is not None:
reply_users = []
for mentions in self.tweets_df['mentions'].dropna():
if mentions:
reply_users.extend(mentions)
network_data['frequent_interactions'] = pd.Series(reply_users).value_counts().head(10).to_dict()
return network_data
def analyze_behavior(self):
"""Analyze behavioral patterns"""
if self.tweets_df is None:
return \\\\{\\\\}
# Retweet vs original content ratio
retweet_count = self.tweets_df['tweet'].str.startswith('RT @').sum()
original_count = len(self.tweets_df) - retweet_count
# Reply patterns
reply_count = self.tweets_df['tweet'].str.startswith('@').sum()
return \\\\{
'content_type_distribution': \\\\{
'original_tweets': original_count,
'retweets': retweet_count,
'replies': reply_count
\\\\},
'retweet_ratio': retweet_count / len(self.tweets_df),
'engagement_patterns': \\\\{
'high_engagement_threshold': self.tweets_df['likes_count'].quantile(0.9),
'viral_tweets': len(self.tweets_df[self.tweets_df['likes_count'] > self.tweets_df['likes_count'].quantile(0.95)])
\\\\}
\\\\}
def generate_repuerto(self):
"""Generate investigation repuerto"""
repuerto = \\\\{
'investigation_objetivo': self.nombre de usuario,
'investigation_date': datetime.now().isoformat(),
'data_summary': \\\\{
'tweets_collected': len(self.tweets_df) if self.tweets_df is not None else 0,
'followers_collected': len(self.followers_df) if self.followers_df is not None else 0,
'following_collected': len(self.following_df) if self.following_df is not None else 0
\\\\},
'analysis_results': self.results
\\\\}
# Save to JSON
with open(f'twitter_investigation_\\\\{self.nombre de usuario\\\\}_\\\\{datetime.now().strftime("%Y%m%d")\\\\}.json', 'w') as f:
json.dump(repuerto, f, indent=2, default=str)
# Generate HTML repuerto
self.generate_html_repuerto(repuerto)
return repuerto
def generate_html_repuerto(self, repuerto):
"""Generate HTML investigation repuerto"""
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>Twitter Investigation Repuerto - \\\\{self.nombre de usuario\\\\}</title>
<style>
body \\\\{\\\\{ font-family: Arial, sans-serif; margin: 20px; \\\\}\\\\}
.section \\\\{\\\\{ margin: 20px 0; border: 1px solid #ccc; padding: 15px; \\\\}\\\\}
.section h2 \\\\{\\\\{ color: #333; margin-top: 0; \\\\}\\\\}
table \\\\{\\\\{ border-collapse: collapse; width: 100%; \\\\}\\\\}
th, td \\\\{\\\\{ border: 1px solid #ddd; padding: 8px; text-align: left; \\\\}\\\\}
th \\\\{\\\\{ background-color: #f2f2f2; \\\\}\\\\}
.metric \\\\{\\\\{ display: inline-block; margin: 10px; padding: 10px; background: #f9f9f9; border-radius: 5px; \\\\}\\\\}
</style>
</head>
<body>
<h1>Twitter OSINT Investigation Repuerto</h1>
<div class="section">
<h2>Investigation Summary</h2>
<div class="metric"><strong>objetivo:</strong> @\\\\{self.nombre de usuario\\\\}</div>
<div class="metric"><strong>Date:</strong> \\\\{repuerto['investigation_date']\\\\}</div>
<div class="metric"><strong>Tweets Analyzed:</strong> \\\\{repuerto['data_summary']['tweets_collected']\\\\}</div>
</div>
"""
if 'basic_stats' in self.results:
stats = self.results['basic_stats']
html_content += f"""
<div class="section">
<h2>Basic Statistics</h2>
<div class="metric"><strong>Total Tweets:</strong> \\\\{stats['total_tweets']\\\\}</div>
<div class="metric"><strong>Total Likes:</strong> \\\\{stats['engagement']['total_likes']\\\\}</div>
<div class="metric"><strong>Total Retweets:</strong> \\\\{stats['engagement']['total_retweets']\\\\}</div>
<div class="metric"><strong>Average Likes:</strong> \\\\{stats['engagement']['avg_likes']:.2f\\\\}</div>
</div>
"""
if 'content_analysis' in self.results:
content = self.results['content_analysis']
html_content += """
<div class="section">
<h2>Content Analysis</h2>
<h3>Top hashtags</h3>
<table>
<tr><th>hashtag</th><th>Count</th></tr>
"""
for hashtag, count in list(content['top_hashtags'].items())[:10]:
html_content += f"<tr><td>#\\\\{hashtag\\\\}</td><td>\\\\{count\\\\}</td></tr>"
html_content += """
</table>
<h3>Top Mentions</h3>
<table>
<tr><th>User</th><th>Count</th></tr>
"""
for user, count in list(content['top_mentions'].items())[:10]:
html_content += f"<tr><td>@\\\\{user\\\\}</td><td>\\\\{count\\\\}</td></tr>"
html_content += "</table></div>"
html_content += """
</body>
</html>
"""
with open(f'twitter_investigation_\\\\{self.nombre de usuario\\\\}_\\\\{datetime.now().strftime("%Y%m%d")\\\\}.html', 'w') as f:
f.write(html_content)
def main():
impuerto sys
if len(sys.argv) != 2:
print("uso: python3 twitter-user-investigation.py <nombre de usuario>")
sys.exit(1)
nombre de usuario = sys.argv[1].replace('@', '') # Remove @ if present
investigation = TwitterUserInvestigation(nombre de usuario)
investigation.collect_user_data()
repuerto = investigation.generate_repuerto()
print(f"\nInvestigation completed for @\\\\{nombre de usuario\\\\}")
print(f"Repuerto saved as: twitter_investigation_\\\\{nombre de usuario\\\\}_\\\\{datetime.now().strftime('%Y%m%d')\\\\}.json")
print(f"HTML repuerto saved as: twitter_investigation_\\\\{nombre de usuario\\\\}_\\\\{datetime.now().strftime('%Y%m%d')\\\\}.html")
if __name__ == "__main__":
main()
hashtag and Trend Analysis
#!/usr/bin/env python3
# twitter-hashtag-analysis.py
impuerto twint
impuerto pandas as pd
impuerto matplotlib.pyplot as plt
impuerto seaborn as sns
from datetime impuerto datetime, timedelta
from collections impuerto Counter
impuerto networkx as nx
class hashtagAnalysis:
def __init__(self):
self.tweets_df = None
self.hashtag_network = None
def analyze_hashtag(self, hashtag, days_back=7, limit=1000):
"""Analyze specific hashtag uso"""
print(f"Analyzing hashtag: #\\\\{hashtag\\\\}")
# Configure search
c = twint.Config()
c.Search = f"#\\\\{hashtag\\\\}"
c.Limit = limit
c.Store_pandas = True
c.Hide_output = True
# Set date range
end_date = datetime.now()
start_date = end_date - timedelta(days=days_back)
c.Since = start_date.strftime("%Y-%m-%d")
c.Until = end_date.strftime("%Y-%m-%d")
# Run search
twint.run.Search(c)
self.tweets_df = twint.storage.panda.Tweets_df
if self.tweets_df is not None and not self.tweets_df.empty:
analysis = \\\\{
'hashtag': hashtag,
'total_tweets': len(self.tweets_df),
'unique_users': self.tweets_df['nombre de usuario'].nunique(),
'date_range': f"\\\\{start_date.strftime('%Y-%m-%d')\\\\} to \\\\{end_date.strftime('%Y-%m-%d')\\\\}",
'engagement_stats': self.calculate_engagement_stats(),
'top_users': self.get_top_users(),
'related_hashtags': self.get_related_hashtags(),
'temporal_patterns': self.analyze_temporal_patterns(),
'influence_metrics': self.calculate_influence_metrics()
\\\\}
return analysis
else:
print(f"No tweets found for #\\\\{hashtag\\\\}")
return None
def calculate_engagement_stats(self):
"""Calculate engagement statistics"""
return \\\\{
'total_likes': self.tweets_df['likes_count'].sum(),
'total_retweets': self.tweets_df['retweets_count'].sum(),
'total_replies': self.tweets_df['replies_count'].sum(),
'avg_likes': self.tweets_df['likes_count'].mean(),
'avg_retweets': self.tweets_df['retweets_count'].mean(),
'avg_replies': self.tweets_df['replies_count'].mean(),
'engagement_rate': (self.tweets_df['likes_count'] + self.tweets_df['retweets_count'] + self.tweets_df['replies_count']).mean()
\\\\}
def get_top_users(self, top_n=10):
"""Get top users by tweet count and engagement"""
user_stats = self.tweets_df.groupby('nombre de usuario').agg(\\\\{
'tweet': 'count',
'likes_count': 'sum',
'retweets_count': 'sum',
'replies_count': 'sum'
\\\\}).reset_index()
user_stats['total_engagement'] = user_stats['likes_count'] + user_stats['retweets_count'] + user_stats['replies_count']
return \\\\{
'by_tweet_count': user_stats.nlargest(top_n, 'tweet')[['nombre de usuario', 'tweet']].to_dict('records'),
'by_engagement': user_stats.nlargest(top_n, 'total_engagement')[['nombre de usuario', 'total_engagement']].to_dict('records')
\\\\}
def get_related_hashtags(self, top_n=20):
"""Get hashtags that appear with the objetivo hashtag"""
all_hashtags = []
for hashtags in self.tweets_df['hashtags'].dropna():
if hashtags:
all_hashtags.extend(hashtags)
hashtag_counts = Counter(all_hashtags)
return hashtag_counts.most_common(top_n)
def analyze_temporal_patterns(self):
"""Analyze temporal posting patterns"""
self.tweets_df['datetime'] = pd.to_datetime(self.tweets_df['date'] + ' ' + self.tweets_df['time'])
self.tweets_df['hour'] = self.tweets_df['datetime'].dt.hour
self.tweets_df['day'] = self.tweets_df['datetime'].dt.date
return \\\\{
'hourly_distribution': self.tweets_df['hour'].value_counts().sort_index().to_dict(),
'daily_volume': self.tweets_df['day'].value_counts().sort_index().to_dict(),
'peak_hour': self.tweets_df['hour'].mode().iloc[0],
'peak_day': self.tweets_df['day'].value_counts().index[0].strftime('%Y-%m-%d')
\\\\}
def calculate_influence_metrics(self):
"""Calculate influence and reach metrics"""
# Identify influential tweets (top 10% by engagement)
engagement_threshold = self.tweets_df['likes_count'].quantile(0.9)
influential_tweets = self.tweets_df[self.tweets_df['likes_count'] >= engagement_threshold]
return \\\\{
'influential_tweets_count': len(influential_tweets),
'influential_users': influential_tweets['nombre de usuario'].unique().tolist(),
'viral_threshold': engagement_threshold,
'reach_estimate': self.tweets_df['retweets_count'].sum() * 100 # Rough estimate
\\\\}
def create_hashtag_network(self, min_cooccurrence=2):
"""Create network of co-occurring hashtags"""
hashtag_pairs = []
for hashtags in self.tweets_df['hashtags'].dropna():
if hashtags and len(hashtags) > 1:
# Create pairs of hashtags that appear together
for i in range(len(hashtags)):
for j in range(i + 1, len(hashtags)):
pair = tuple(sorted([hashtags[i], hashtags[j]]))
hashtag_pairs.append(pair)
# Count co-occurrences
pair_counts = Counter(hashtag_pairs)
# Create network graph
G = nx.Graph()
for (hashtag1, hashtag2), count in pair_counts.items():
if count >= min_cooccurrence:
G.add_edge(hashtag1, hashtag2, weight=count)
self.hashtag_network = G
return G
def visualize_hashtag_network(self, output_file="hashtag_network.png"):
"""Visualize hashtag co-occurrence network"""
if self.hashtag_network is None:
self.create_hashtag_network()
plt.figure(figsize=(12, 8))
# Calculate node sizes based on degree
node_sizes = [self.hashtag_network.degree(node) * 100 for node in self.hashtag_network.nodes()]
# Draw network
pos = nx.spring_layout(self.hashtag_network, k=1, iterations=50)
nx.draw(self.hashtag_network, pos,
node_size=node_sizes,
node_color='lightblue',
font_size=8,
font_weight='bold',
with_labels=True,
edge_color='gray',
alpha=0.7)
plt.title("hashtag Co-occurrence Network")
plt.axis('off')
plt.tight_layout()
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.close()
print(f"Network visualization saved as: \\\\{output_file\\\\}")
def main():
impuerto sys
if len(sys.argv) < 2:
print("uso: python3 twitter-hashtag-analysis.py <hashtag> [days_back] [limit]")
sys.exit(1)
hashtag = sys.argv[1].replace('#', '') # Remove # if present
days_back = int(sys.argv[2]) if len(sys.argv) > 2 else 7
limit = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
analyzer = hashtagAnalysis()
analysis = analyzer.analyze_hashtag(hashtag, days_back, limit)
if analysis:
print(f"\nhashtag Analysis Results for #\\\\{hashtag\\\\}")
print("=" * 50)
print(f"Total tweets: \\\\{analysis['total_tweets']\\\\}")
print(f"Unique users: \\\\{analysis['unique_users']\\\\}")
print(f"Average engagement: \\\\{analysis['engagement_stats']['engagement_rate']:.2f\\\\}")
print(f"Peak hour: \\\\{analysis['temporal_patterns']['peak_hour']\\\\}:00")
# Create network visualization
analyzer.visualize_hashtag_network(f"hashtag_network_\\\\{hashtag\\\\}.png")
# Save detailed results
impuerto json
with open(f"hashtag_analysis_\\\\{hashtag\\\\}_\\\\{datetime.now().strftime('%Y%m%d')\\\\}.json", 'w') as f:
json.dump(analysis, f, indent=2, default=str)
print(f"\nDetailed analysis saved as: hashtag_analysis_\\\\{hashtag\\\\}_\\\\{datetime.now().strftime('%Y%m%d')\\\\}.json")
if __name__ == "__main__":
main()
Best Practices and OPSEC
Operational Security
#!/bin/bash
# twint-opsec-setup.sh
echo "Twint OPSEC configuración"
echo "========================"
# Use VPN or proxy
echo "1. Network Security:"
echo " □ Configure VPN conexión"
echo " □ Use SOCKS proxy if needed"
echo " □ Rotate IP addresses periodically"
# Rate limiting
echo -e "\n2. Rate Limiting:"
echo " □ Add delays between requests"
echo " □ Limit concurrent searches"
echo " □ Monitor for rate limiting"
# Data security
echo -e "\n3. Data Security:"
echo " □ Encrypt stored data"
echo " □ Use secure file permissions"
echo " □ Regular data cleanup"
# Legal compliance
echo -e "\n4. Legal Compliance:"
echo " □ Verify investigation scope"
echo " □ Document methodology"
echo " □ Respect privacy laws"
Rate Limiting and Delays
impuerto twint
impuerto time
impuerto random
def safe_twint_search(config, delay_range=(1, 3)):
"""Run Twint search with random delays"""
try:
# Add random delay
delay = random.uniform(delay_range[0], delay_range[1])
time.sleep(delay)
# Run search
twint.run.Search(config)
return True
except Exception as e:
print(f"Search failed: \\\\{e\\\\}")
# Longer delay on failure
time.sleep(random.uniform(5, 10))
return False
def batch_user_analysis(nombre de usuarios, delay_range=(2, 5)):
"""Analyze multiple users with delays"""
results = \\\\{\\\\}
for nombre de usuario in nombre de usuarios:
print(f"Analyzing @\\\\{nombre de usuario\\\\}")
c = twint.Config()
c.nombre de usuario = nombre de usuario
c.Limit = 100
c.Store_pandas = True
c.Hide_output = True
if safe_twint_search(c, delay_range):
if twint.storage.panda.Tweets_df is not None:
results[nombre de usuario] = len(twint.storage.panda.Tweets_df)
else:
results[nombre de usuario] = 0
else:
results[nombre de usuario] = "Failed"
# Clear storage for next user
twint.storage.panda.Tweets_df = None
return results
solución de problemas
Common Issues and Solutions
# Issue: No tweets returned
# Solution: Check if user exists and has public tweets
twint -u nombre de usuario --debug
# Issue: Rate limiting
# Solution: Add delays and reduce request frequency
twint -u nombre de usuario --limit 50
# Issue: SSL/TLS errors
# Solution: Update certificados or disable SSL verification
pip install --upgrade certifi
# Issue: Pandas storage not working
# Solution: Clear storage and reinitialize
python3 -c "impuerto twint; twint.storage.panda.Tweets_df = None"
Debug and Logging
impuerto twint
impuerto logging
# Enable debug logging
logging.basicConfig(level=logging.DEBUG)
# Configure with debug mode
c = twint.Config()
c.nombre de usuario = "nombre de usuario"
c.Debug = True
c.Verbose = True
# Run with error handling
try:
twint.run.Search(c)
except Exception as e:
print(f"Error: \\\\{e\\\\}")
impuerto traceback
traceback.print_exc()
Resources
- Twint GitHub Repository
- Twint documentación
- Twitter OSINT Techniques
- Social Media Intelligence Guide
- Python Data Analysis with Pandas
This hoja de trucos provides comprehensive guidance for using Twint for Twitter OSINT investigations. Always ensure proper autorización and legal compliance before conducting any social media intelligence gathering activities.