##About
This is a tool to generate dictionaries based on twitter searches, inspired by this blog.
Compared to the original blog, this version has a few differences,
##Example usage
$./tweetsearch.sh "sveriges riksdag" "svenska regeringen" | head -n20
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 77594 100 77594 0 0 64952 0 0:00:01 0:00:01 --:--:-- 79665
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 41749 100 41749 0 0 45874 0 --:--:-- --:--:-- --:--:-- 61758
sveriges
riksdag
svpol
bjöd
regeringen
svenska
kritiserad
judehatare
kaplan
mehmet
riksdagsislamist
yvonne
ridley
ytterligare
mehmetkaplan
miljöpartiet
jerlerup
värre
soppan
riksdagen
##Requirements
##Code
It’s not very advanced or special in any way:
#/!bin/bash
if [ $# -eq 0 ]
then
echo "Usage: `basename $0` term1 term2"
exit 65;
fi
tmpfile=$(mktemp)
resultsfile=$(mktemp)
#echo "Searching for the following terms:"
for term in "$@"; do
#for i in $*; do
#echo "Performing search for $term"
curl -G --data-urlencode "q=$term" --data-urlencode "rpp=500" "http://search.twitter.com/search.json" > $tmpfile
# Handle the data
# jshon -e results -a -e text < $tmpfile - Extract tweet content,
# cut -d"\"" -f2 - remove quotes
# tr " " \\n - convert space to linebreak
# sed s/\^\#//g| - remove #-char from beginng of line
# tr '[A-Z]' '[a-z]' - make lowercase
# sed s/\^\@//g - remove twitternames
# grep -v "^http://" - remove links
# tr -d ":,.!?" - remove some other chars
jshon -e results -a -e text < $tmpfile | cut -d"\"" -f2| tr " " \\n| sed s/\^\#//g| tr '[A-Z]' '[a-z]'| sed s/\^\@//g| grep -v "^http://"| tr -d ":,.!?" >> $resultsfile;
done
#echo "Sorting, uniquifying and showing results"
# Sort,
# 'uniq -c' order by num occurrence,
# 'sort -nr' sort by num occurence,
# 'cut -c9-' remove occurence-num,
# 'grep -vwx --file=swedish_stopwords' remove stop-words
# 'grep -v "^$\|^.$\|^..$"' remove empty lines, one-letter words and two-letter words
sort $resultsfile| uniq -c| sort -nr| cut -c9-| grep -vwx --file=swedish_stopwords| grep -v "^$\|^.$\|^..$"
#jshon -e results -a -e text < search.json
2013-04-13