library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(descr) # Describe attributes of objects/variables
?diamonds
class(cut)
## [1] "function"
sapply(diamonds,class)
## $carat
## [1] "numeric"
##
## $cut
## [1] "ordered" "factor"
##
## $color
## [1] "ordered" "factor"
##
## $clarity
## [1] "ordered" "factor"
##
## $depth
## [1] "numeric"
##
## $table
## [1] "numeric"
##
## $price
## [1] "integer"
##
## $x
## [1] "numeric"
##
## $y
## [1] "numeric"
##
## $z
## [1] "numeric"
attach(diamonds)
freq(cut,plot=F) # can add a plot by default
## cut
## Frequency Percent Cum Percent
## Fair 1610 2.985 2.985
## Good 4906 9.095 12.080
## Very Good 12082 22.399 34.479
## Premium 13791 25.567 60.046
## Ideal 21551 39.954 100.000
## Total 53940 100.000
with(diamonds, {freq(cut, plot=T)})
## cut
## Frequency Percent Cum Percent
## Fair 1610 2.985 2.985
## Good 4906 9.095 12.080
## Very Good 12082 22.399 34.479
## Premium 13791 25.567 60.046
## Ideal 21551 39.954 100.000
## Total 53940 100.000
diam_tb=as_tibble(diamonds)
class(diam_tb) # How is this different from a data frame?
## [1] "tbl_df" "tbl" "data.frame"
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
ggplot(data = diamonds,aes(cut)) +
geom_bar(mapping = aes(y = (..count..)/sum(..count..))) + theme_bw() +
scale_y_continuous(labels=scales::percent) +
ylab("Percent")
setwd("/Users/dasha/Desktop")
pdf("barplot.pdf")
library("ggplot2")
ggplot(data = diamonds,aes(cut)) +
geom_bar(mapping = aes(y = (..count..)/sum(..count..))) + theme_bw() +
scale_y_continuous(labels=scales::percent) +
ylab("Percent")
dev.off()
## quartz_off_screen
## 2
##png
png("barplot.png")
ggplot(data = diamonds,aes(cut)) +
geom_bar(mapping = aes(y = (..count..)/sum(..count..))) + theme_bw() +
scale_y_continuous(labels=scales::percent) +
ylab("Percent")
dev.off()
## quartz_off_screen
## 2
##svg
svg("barplot.svg")
ggplot(data = diamonds,aes(cut)) +
geom_bar(mapping = aes(y = (..count..)/sum(..count..))) + theme_bw() +
scale_y_continuous(labels=scales::percent) +
ylab("Percent")
dev.off()
## quartz_off_screen
## 2
ggplot(data = diam_tb) +
geom_histogram(mapping = aes(x = carat), binwidth = 0.5)
ggplot(data = diam_tb) +
geom_histogram (mapping = aes(x = carat), fill="#FF6666", binwidth = 0.03) +
labs(title="Quantity of diamonds according to weight",x="Diamond Weight", y = "Count")+
theme_classic()
smaller <- diamonds %>%
filter(carat < 3)
ggplot(data = smaller, mapping = aes(x = carat)) +
geom_histogram(binwidth = 0.1)
ggplot(data = smaller, mapping = aes(x = carat, colour = cut)) +
geom_freqpoly(binwidth = 0.1) + theme_bw()
library(RColorBrewer)
ggplot(data = faithful) +
geom_point(mapping = aes(x = eruptions, y = waiting), color='blue')+
labs(title="Eruptions by time of waiting",x="Eruptions", y = "Waiting")+
theme_classic()