#erzeuge Text-Datei daten.txt und speichere ab
#lese Daten ein, relativ zum aktuellen Working Directory oder gebe kompletten Pfad an
#in Linux relativ: "Unterordner1/Unterordner2/Datei.txt"
#in Linux absolut: "/Ordner/Unterordner1/Unterordner2/Datei.txt"
#in Windows relativ: "Unterordner1\\Unterordner2\\Datei.txt"
#in Windows absolut: "C:\\Ordner\\Unterordner1\\Unterordner2\\Datei.txt"
v <- scan("R/Daten.txt")
v
#Erzeuge aus R-Daten eine Text-Datei; Pfadangabe für Speicherort analog zu oben
cat(v,file="fakt.txt",sep="; ")

lottery.number <- scan("/u/Lehre/R-Kurse/SS16/Datensaetze/Datensaetze/lottery.number")
lottery.payoff <- scan("/u/Lehre/R-Kurse/SS16/Datensaetze/Datensaetze/lottery.payoff")
lottery2.number <- scan("/u/Lehre/R-Kurse/SS16/Datensaetze/Datensaetze/lottery2.number")
lottery3.number <- scan("/u/Lehre/R-Kurse/SS16/Datensaetze/Datensaetze/lottery3.number")
lottery2.payoff <- scan("/u/Lehre/R-Kurse/SS16/Datensaetze/Datensaetze/lottery2.payoff")
lottery3.payoff <- scan("/u/Lehre/R-Kurse/SS16/Datensaetze/Datensaetze/lottery3.payoff")
plot(lottery.number,lottery.payoff,pch=20)


hist(lottery.number)
#Anzahl der Balken (äquidistante Zerlegung des Wertebereichs)
hist(lottery.number,breaks=20)
#Klassengrenzen:
hist(lottery.number,breaks=c(0,100,200,1000))
#freq=F: relative Häufigkeiten, density~Liniendichte:
hist(lottery.number,freq=F,density=30)
#col~Farbe der Balken:
hist(lottery.payoff,breaks=seq(0,870,30),col=3)

#multiple figures per row:
par(mfrow=c(3,1))
hist(lottery.payoff)
hist(lottery2.payoff)
hist(lottery3.payoff)
par(mfrow=c(1,3))
hist(lottery.payoff)
hist(lottery2.payoff)
hist(lottery3.payoff)
dev.off()


x <- c(44,48,49,52,49,46,52,38,53,45,49,39,37,33,47)
#Teile Daten in Klassen auf (äquidistante Zerlegung des Wertebereichs)
cut(x,3)
#Angabe der Klassengrenzen
cut(x,c(40,50,60))
cut(x,c(30,40,50,60),labels=c("30er","40er","50er"))
#Ohne Label -> Klassenbezeichung mit natürlichen Zahlen
cut(x,c(30,40,50,60),labels=F)
as.numeric(cut(x,c(30,40,50,60)))

#Zähle Häufigkeiten in einem Vektor von natürlichen Zahlen:
?tabulate
tabulate(c(1,2,1,3,4,5,4,1))
#nbins~maximaler Wert, für den Häufigkeiten bestimmt werden:
tabulate(c(1,2,1,3,4,5,4,1),3)
tabulate(c(1,2,1,3,4,5,4,1),6)
tabulate(cut(x,c(30,40,50,60)))

#Balkendiagramm:
barplot(rnorm(100))
h <- c(1,4,2,10,-3)
w <- c(4,1,1,3,1)
barplot(h,width=w)
namen <- c("Pos1","Pos2","Pos3","Pos4","Pos5")
barplot(h,width=1,names=namen,space=0)
barplot(h,width=1,names=namen,space=0,density=c(2,8))

x <- c(1.2,NA,3.5,1.4,7.9,1.1)
#NA und NaN werden entfernt
sort(x)
rev(sort(x))
sort(x,decreasing=T)
x
#erhalte Permutation der Indizes, die den sortierten Vektor erzeugt
sort.list(x)
x[sort.list(x)]
sort.list(x,decreasing=T)
x[sort.list(x,decreasing=T)]
rank(x)
median(x)
median(x,na.rm=T)

hist(lottery.payoff,breaks=25)
mean(lottery.payoff)
median(lottery.payoff,na.rm=T)
quantile(lottery.payoff, probs = seq(0,1,0.1))

#Boxplots:
boxplot(lottery.payoff)
boxplot(lottery.payoff, lottery2.payoff)

#QQ-Plots:
qqplot(lottery.number,lottery2.number)
abline(0,1)
qqnorm(lottery.payoff)
lottery.normiert<-(lottery.payoff-mean(lottery.payoff))/sqrt(var(lottery.payoff))
qqnorm(lottery.normiert)
abline(0,1)